def train_model(graph, seq_params): nb_iter = 1 nb_epochs = 10000 len_seq = 20 num_seqs = 100000 X_train = np.zeros((num_seqs, len_seq, 60), dtype=np.bool) y_train = np.zeros((num_seqs, 60), dtype=np.bool) from monitoring import LossHistory history = LossHistory() checkpointer = ModelCheckpoint(filepath=save_model_path, verbose=1, save_best_only=True) for e in range(nb_iter): print('-' * 40) print('Iteration', e) print('-' * 40) print("Generating training data...") get_random_batch(X_train, y_train, seq_params) print("Fitting data...") earlystopper = EarlyStopping(monitor='val_loss', patience=25, verbose=2) graph.fit({ 'input': X_train, 'out1': y_train[:, :60] }, validation_split=0.3, batch_size=128, nb_epoch=nb_epochs, callbacks=[checkpointer, earlystopper, history])
def train_model(graph): nb_iter = 1 nb_epochs = 10000 len_seq=20 num_seqs=100000 seq_params = { "freqs":{'S':1}, "add_noise": False, "mult": 59, "time_repr":time_representation, "label_repr":label_representation } X_train = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float) y_train = np.zeros((num_seqs, 120), dtype=np.bool) from monitoring import LossHistory history = LossHistory() checkpointer = ModelCheckpoint(filepath=save_model_path, verbose=1, save_best_only=True) for e in range(nb_iter): print('-'*40) print('Iteration', e) print('-'*40) print("Generating training data...") get_random_batch(X_train, y_train, seq_params) print("Fitting data...") earlystopper = EarlyStopping(monitor='val_loss', patience=100, verbose=2) graph.fit({'input':X_train, 'out1':y_train[:,:120]}, validation_split = 0.3, batch_size=128, nb_epoch=nb_epochs, callbacks=[checkpointer, earlystopper, history])
def train_model(graph): nb_iter = 1 nb_epochs = 10000 len_seq=20 num_seqs=100000 seq_params = { "freqs":{'S':1}, "add_noise": False, "mult": 30, "time_repr":time_representation, "label_repr": label_representation } X_train = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float) y_train = np.zeros((num_seqs, 60), dtype=np.bool) from monitoring import LossHistory history = LossHistory() checkpointer = ModelCheckpoint(filepath=save_model_path, verbose=1, save_best_only=True) for e in range(nb_iter): print('-'*40) print('Iteration', e) print('-'*40) print("Generating training data...") get_random_batch(X_train, y_train, seq_params) print("Fitting data...") earlystopper = EarlyStopping(monitor='val_loss', patience=25, verbose=2) graph.fit({'input':X_train, 'out1':y_train[:,:60]}, validation_split = 0.3, batch_size=128, nb_epoch=nb_epochs, callbacks=[checkpointer, earlystopper, history])
def train_aenet_model(config): tf.reset_default_graph() tf_config = tf.ConfigProto() if RUN_IN_GPU: tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) train_lbs, _ = DataHandler.load_labels(config['train_lbs_file']) print('Loading training data...done') aenet = AENet(sess, config, 'AENet', is_train=True) print('Building AENet model...done') print('Training...') for i in range(config['iterations']): batch_lbs, _ = get_random_batch(train_lbs, config['batch_size']) cur_loss = aenet.fit(batch_lbs) print('Iteration {:>8d}/{}: Loss: {}'.format(i + 1, config['iterations'], cur_loss)) aenet.save(config['ckpt_dir']) print('Saving current AENet model...done') print('Training...done') tf.reset_default_graph() sess.close()
def demo(graph): plt.ion() fig_size = plt.rcParams["figure.figsize"] fig_size[0] = 16 fig_size[1] = 12 plt.rcParams["figure.figsize"] = fig_size f, (ax3) = plt.subplots(1, 1, sharey=True) ax3.set_title('S') ax3.set_ylim(ymax=1, ymin=0) ax3.set_xlim(xmax=59, xmin=0) plt.xticks(np.arange(0, 60, 1.0)) prob_seconds, = ax3.plot([], [], linewidth=2.0) true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center') num_seqs = 200 len_seq = 10 X = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float) y = np.zeros((num_seqs, 60), dtype=np.bool) seq_params = { #"freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 }, "freqs": { 'S': 1 }, "add_noise": False, "mult": 59, "time_repr": time_representation, "label_repr": label_representation } freqs, data = get_random_batch(X, y, seq_params) for s in range(num_seqs): f.suptitle("freqency: " + str(freqs[s + len_seq]), fontsize=20) secs = np.argmax(y[s]) pred_probs = next_prediction(graph, X[s:s + 1, :, :]) prob_seconds.set_ydata(pred_probs) prob_seconds.set_xdata(range(60)) for i, b in enumerate(true_seconds): if i == secs: b.set_height(1) else: b.set_height(0) f.canvas.draw() sleep(2)
def demo(graph): plt.ion() fig_size = plt.rcParams["figure.figsize"] fig_size[0] = 16 fig_size[1] = 12 plt.rcParams["figure.figsize"] = fig_size f, (ax3) = plt.subplots(1, 1, sharey=True) ax3.set_title('S') ax3.set_ylim(ymax = 1, ymin = 0) ax3.set_xlim(xmax = 59, xmin = 0) plt.xticks(np.arange(0, 60, 1.0)) prob_seconds, = ax3.plot([], [], linewidth=2.0) true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center') num_seqs = 100 len_seq = 10 X = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float) y = np.zeros((num_seqs, 120), dtype=np.bool) seq_params = { #"freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 }, "freqs":{'S':1}, "add_noise": False, "mult": 59, "time_repr":time_representation, "label_repr": label_representation, } freqs, data = get_random_batch(X, y, seq_params) for s in range(num_seqs): f.suptitle("freqency: " + str(freqs[s+len_seq]), fontsize=20) secs = np.argmax(y[s])%60 preds = next_prediction(graph, X[s:s+1, :, :]) pred_probs = np.maximum(preds[:60], preds[60:]) prob_seconds.set_ydata(pred_probs) prob_seconds.set_xdata(range(60)) for i,b in enumerate(true_seconds): if i == secs: b.set_height(1) else: b.set_height(0) f.canvas.draw() sleep(2)
def demo(graph, seq_params): plt.ion() fig_size = plt.rcParams["figure.figsize"] fig_size[0] = 16 fig_size[1] = 12 plt.rcParams["figure.figsize"] = fig_size f, (ax3) = plt.subplots(1, 1, sharey=True) ax3.set_title('S') ax3.set_ylim(ymax=1, ymin=0) ax3.set_xlim(xmax=59, xmin=0) plt.xticks(np.arange(0, 60, 1.0)) prob_seconds, = ax3.plot([], [], linewidth=2.0) true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center') num_seqs = 200 len_seq = 10 X = np.zeros((num_seqs, len_seq, 60), dtype=np.bool) y = np.zeros((num_seqs, 60), dtype=np.bool) freqs, data = get_random_batch(X, y, seq_params) for s in range(num_seqs): f.suptitle("freqency: " + str(freqs[s + len_seq]), fontsize=20) secs = np.argmax(y[s]) pred_probs = next_prediction(graph, X[s:s + 1, :, :]) prob_seconds.set_ydata(pred_probs) prob_seconds.set_xdata(range(60)) for i, b in enumerate(true_seconds): if i == secs: b.set_height(1) else: b.set_height(0) f.canvas.draw() sleep(2)
def test_acregnet_model(config): tf.reset_default_graph() sess = tf.Session() test_ims, _ = DataHandler.load_images(config['test_ims_file']) print('Loading test data...done') config['batch_size'] = test_ims.shape[0] * 2 config['image_size'] = [256, 256] acregnet = ACRegNet(sess, config, 'ACRegNet', is_train=False) print('Building AC-RegNet model...done') acregnet.restore(config['ckpt_dir']) print('Loading trained AC-RegNet model...done') batch_ims_x, batch_ims_y = get_random_batch(test_ims, config['batch_size']) print('Testing...') acregnet.deploy(config['result_dir'], batch_ims_x, batch_ims_y, True) print('Testing...done')
def train(model_path, data, sess, saver, placeholders, model, opt, args): for epoch in range(args.epochs): t = time.time() batch, labels = get_random_batch(args.batch_size, data) outs = sess.run( [opt.opt_op, opt.cost], feed_dict={ placeholders['inputs']: batch, placeholders['dropout']: args.dropout, placeholders['labels']: labels }) avg_cost = outs[1] if epoch % 100 == 0: print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "time=", "{:.3f}".format(time.time() - t)) if epoch % 1000 == 0 and epoch != 0: save_path = saver.save(sess, model_path) print('saving checkpoint at', save_path)
def demo(graph, seq_params): plt.ion() fig_size = plt.rcParams["figure.figsize"] fig_size[0] = 16 fig_size[1] = 12 plt.rcParams["figure.figsize"] = fig_size f, (ax3) = plt.subplots(1, 1, sharey=True) ax3.set_title('S') ax3.set_ylim(ymax = 1, ymin = 0) ax3.set_xlim(xmax = 59, xmin = 0) plt.xticks(np.arange(0, 60, 1.0)) prob_seconds, = ax3.plot([], [], linewidth=2.0) true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center') num_seqs = 200 len_seq = 10 X = np.zeros((num_seqs, len_seq, 60), dtype=np.bool) y = np.zeros((num_seqs, 60), dtype=np.bool) freqs, data = get_random_batch(X, y, seq_params) for s in range(num_seqs): f.suptitle("freqency: " + str(freqs[s+len_seq]), fontsize=20) secs = np.argmax(y[s]) pred_probs = next_prediction(graph, X[s:s+1, :, :]) prob_seconds.set_ydata(pred_probs) prob_seconds.set_xdata(range(60)) for i,b in enumerate(true_seconds): if i == secs: b.set_height(1) else: b.set_height(0) f.canvas.draw() sleep(2)
def train(epochs, batch_size, input_dir, model_save_dir): # Make an instance of the VGG class vgg_model = VGG_MODEL(image_shape) # Get High-Resolution(HR) [148,148,3] in this case and corresponding Low-Resolution(LR) images x_train_lr, x_train_hr = utils.load_training_data(input_dir, [148, 148, 3]) #Based on the the batch size, get the total number of batches batch_count = int(x_train_hr.shape[0] / batch_size) #Get the downscaled image shape based on the downscale factor image_shape_downscaled = utils.get_downscaled_shape( image_shape, downscale_factor) # Initialize the generator network with the input image shape as the downscaled image shape (shape of LR images) generator = networks.Generator(input_shape=image_shape_downscaled) # Initialize the discriminator with the input image shape as the original image shape (HR image shape) discriminator = networks.Discriminator(image_shape) # Get the optimizer to tweak parameters based on loss optimizer = vgg_model.get_optimizer() # Compile the three models - generator, discriminator and gan(comb of both gen and disc - this network will train generator and will not tweak discriminator) generator.compile(loss=vgg_model.vgg_loss, optimizer=optimizer) discriminator.compile(loss="binary_crossentropy", optimizer=optimizer) gan = networks.GAN_Network(generator, discriminator, image_shape_downscaled, optimizer, vgg_model.vgg_loss) # Run training for the number of epochs defined for e in range(1, epochs + 1): print('-' * 15, 'Epoch %d' % e, '-' * 15) for _ in tqdm(range(batch_count)): # Get the next batch of LR and HR images image_batch_lr, image_batch_hr = utils.get_random_batch( x_train_lr, x_train_hr, x_train_hr.shape[0], batch_size) generated_images_sr = generator.predict(image_batch_lr) print(generated_images_sr.shape) real_data_Y = np.ones( batch_size) - np.random.random_sample(batch_size) * 0.2 fake_data_Y = np.random.random_sample(batch_size) * 0.2 discriminator.trainable = True print(real_data_Y.shape) d_loss_real = discriminator.train_on_batch(image_batch_hr, real_data_Y) d_loss_fake = discriminator.train_on_batch(generated_images_sr, fake_data_Y) discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real) rand_nums = np.random.randint(0, x_train_hr.shape[0], size=batch_size) image_batch_hr = x_train_hr[rand_nums] image_batch_lr = x_train_lr[rand_nums] gan_Y = np.ones( batch_size) - np.random.random_sample(batch_size) * 0.2 discriminator.trainable = False gan_loss = gan.train_on_batch(image_batch_lr, [image_batch_hr, gan_Y]) print("discriminator_loss : %f" % discriminator_loss) print("gan_loss :", gan_loss) gan_loss = str(gan_loss) if e % 50 == 0: generator.save_weights(model_save_dir + 'gen_model%d.h5' % e) discriminator.save_weights(model_save_dir + 'dis_model%d.h5' % e) networks.save_model(gan)
batches_train = images2batches(images_train) # Scailing batches_train = batches_train / 255 # Create neural network neural_network = EncDecNetLite() # Initialize weights neural_network.init() losses = [] # Main cycle for i in range(UPDATES_NUM): # Get random batch for Stochastic Gradient Descent X_batch_train = get_random_batch(batches_train, BATCH_SIZE) # Forward pass, calculate network''s outputs Y_batch = neural_network.forward(X_batch_train) # Calculate sum squared loss loss = get_loss(Y_batch, X_batch_train) # Backward pass, calculate derivatives of loss w.r.t. weights dw = neural_network.backprop(Y_batch, X_batch_train) # Correct neural network''s weights neural_network.apply_dw(dw) # Print the loss every 1000 iterations if i % 10 == 0:
return repres X_test = np.zeros((test_size, seq_size, len_circ_repr), dtype=np.float) y_test = np.zeros((test_size, len_circ_repr), dtype=np.float) params = { # "freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 }, "freqs":{'S':1, 'T':60}, "add_noise": False, "mult": 59, "time_repr":time_representation } freqs = get_random_batch(X_test, y_test, params) ## build the model: print('Build model...') graph = Graph() graph.add_input(name='input', ndim=3) graph.add_node(GRU(len_circ_repr, 128, return_sequences=True), name='gru1', input='input') graph.add_node(GRU(128, 128, return_sequences=False), name='gru2', input='gru1') graph.add_node(Dense(128, 2, activation='tanh'), name='split1', input='gru2') graph.add_node(Dense(128, 2, activation='tanh'), name='split2', input='gru2') graph.add_node(Dense(128, 2, activation='tanh'), name='split3', input='gru2') #graph.add_node(GRU(32, 2, return_sequences=False), name='split1', input='tdd') #graph.add_node(TimeDistributedDense(32, 2, activation='tanh'), name='split2', input='gru') #graph.add_node(TimeDistributedDense(32, 2, activation='tanh'), name='split3', input='gru')
adj_norm_batch, adj_orig_batch, adj_idx = get_consecutive_batch( 0, args.batch_size, adj, adj_norm) features = features_batch feed_dict = construct_feed_dict(adj_norm_batch, adj_orig_batch, features, placeholders) feed_dict.update({placeholders['dropout']: args.dropout}) outs = sess.run([model.reconstructions], feed_dict=feed_dict) reconstructions = outs[0].reshape([args.batch_size, 180, 180]) # Visualize sample full matrix of original, normalized, and reconstructed batches. for i in range(adj_orig_batch.shape[0]): visualize_matrix(adj_orig_batch, i, model_name, 'original_' + str(i)) visualize_matrix(adj_norm_batch, i, model_name, 'normalized_' + str(i)) visualize_matrix(reconstructions, i, model_name, 'reconstruction_' + str(i)) adj_norm_batch, adj_orig_batch, adj_idx = get_random_batch( args.batch_size, adj, adj_norm) features = features_batch feed_dict = construct_feed_dict(adj_norm_batch, adj_orig_batch, features, placeholders) feed_dict.update({placeholders['dropout']: args.dropout}) outs = sess.run([model.z_mean], feed_dict=feed_dict) z = outs[0] # Visualize Latent Space onehot = np.array([0 if idx < 203 else 1 for idx in adj_idx]) visualize_latent_space(z, onehot, model_name)
def main(args, silent_mode=False): if not silent_mode: print 'Using the following settings:' for arg, value in args.__dict__.items(): print arg, ':', value # hyperparameters/settings optimizer = tf.train.RMSPropOptimizer #optimizer = tf.train.AdamOptimizer BMLP_ACTIVATION = tf.nn.relu EPS = 1e-3 hidden_dim = args.hidden_dim source_dim = args.source_dim input_dim = args.mix_dim if args.visdom: vis = visdom.Visdom(server=args.vd_server, port=args.vd_port, env='main') if not args.backprop and not silent_mode: print 'Not backpropagaging through product distribution' plot_size = args.plot_truncate #################### get the data #################### if args.data == 'synthetic': all_x, all_y, A = synthetic.get_data(seed=101, task_type=args.task, mix_dim=input_dim) val_x = all_x[:, :args.n_validation] val_y = all_y[:, :args.n_validation] train_x = all_x[:, args.n_validation:] train_y = all_y[:, args.n_validation:] plot_size = 500 elif args.data == 'audio': linear_mix, pnl_mix, A, sources = audio.get_data() if args.task == 'linear': all_y = linear_mix elif args.task == 'pnl': all_y = pnl_mix else: raise ValueError('task not supported for data set') all_x = sources val_x = all_x[:, :args.n_validation] val_y = all_y[:, :args.n_validation] train_x = all_x[:, args.n_validation:] train_y = all_y[:, args.n_validation:] if args.blind: train_x = train_y.copy() ###################################################### # construct the parts or the graph which contain trainable parameters with tf.variable_scope('separator'): if args.separator_type == 'linear': separator = MLP([input_dim, source_dim], [None], stddev=args.sep_stddev) elif args.separator_type == 'pnl': linear_separator = MLP([input_dim, source_dim], [None], stddev=args.sep_stddev) in_block = MLPBlock(input_dim, 32, n_layers=2, stddev=args.sep_stddev) def separator(x): return linear_separator(in_block(x, activation=BMLP_ACTIVATION)) elif args.separator_type == 'mlp': separator = MLP([input_dim, hidden_dim, hidden_dim, source_dim], [tf.nn.relu, tf.nn.relu, None], stddev=args.sep_stddev) if args.mixer_type == 'linear': mixer = MLP([source_dim, input_dim], [None], stddev=args.mix_stddev) elif args.mixer_type == 'pnl': linear_mixer = MLP([source_dim, input_dim], [None], stddev=args.mix_stddev) out_block = MLPBlock(input_dim, 16, n_layers=2, bias_value=0.0, stddev=args.mix_stddev) def mixer(x): return out_block(linear_mixer(x), activation=BMLP_ACTIVATION) else: mixer = MLP([source_dim, hidden_dim, hidden_dim, input_dim], [tf.nn.relu, tf.nn.relu, None], stddev=args.mix_stddev) if args.prior == 'trainable': prior_bmlp = MLPBlock(source_dim, 32, n_layers=2, stddev=args.prior_stddev) if args.normalize: initial_gamma = tf.constant(.1, shape=(source_dim, )) gamma = tf.Variable(initial_gamma, name='gamma') initial_beta = tf.constant(0.0, shape=(source_dim, )) beta = tf.Variable(initial_beta, name='beta') with tf.variable_scope('discriminator'): if args.task == 'mlp': discriminator = MLP([source_dim, hidden_dim, hidden_dim, 1], [tf.nn.relu, tf.nn.relu, None], stddev=args.disc_stddev) else: discriminator = MLP([source_dim, 64, 1], [tf.nn.relu, None], stddev=args.disc_stddev) sep_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='separator') disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') y = tf.placeholder(tf.float32, shape=[None, input_dim]) if not args.blind: x = tf.placeholder(tf.float32, shape=[None, source_dim]) prediction = separator(y) prediction_processed = prediction if args.normalize: prediction_mean = tf.reduce_mean(prediction, 0) # note that we don't want to use -= here. prediction_processed = prediction_processed - prediction_mean prediction_sd = tf.sqrt( tf.reduce_mean(prediction_processed**2, 0) + EPS) prediction_processed /= prediction_sd if args.prior == 'anica': prediction_perm = resample_rows_per_column(prediction_processed) elif args.prior == 'gaussian': prediction_perm = tf.random_normal(tf.shape(prediction)) elif args.prior == 'uniform': prediction_perm = tf.random_uniform(tf.shape(prediction)) elif args.prior == 'trainable': prior_samp = tf.random_normal(tf.shape(prediction)) prediction_perm = prior_bmlp(prior_samp, activation=BMLP_ACTIVATION) if args.normalize: prediction_perm_mean = tf.reduce_mean(prediction_perm, 0) prediction_perm_norm = prediction_perm - prediction_perm_mean prediction_perm_sd = tf.sqrt( tf.reduce_mean(prediction_perm_norm**2, 0) + EPS) prediction_perm_norm /= prediction_perm_sd prediction_perm = prediction_perm_norm else: raise ValueError("Unknown 'prior'") joint_logit = discriminator(prediction_processed) marg_logit = discriminator(prediction_perm) if args.gan_type == 'default': disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) + tf.reduce_mean(tf.nn.softplus(joint_logit))) if args.backprop: gen_cost = -disc_cost else: gen_cost = -tf.reduce_mean(tf.nn.softplus(joint_logit)) elif args.gan_type == 'kl': disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) + tf.reduce_mean(tf.nn.softplus(joint_logit))) # there is no grad wrt the marginals by definition for this loss gen_cost = -tf.reduce_mean(joint_logit) elif args.gan_type == 'bgan': disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) + tf.reduce_mean(tf.nn.softplus(joint_logit))) if args.backprop: gen_cost = (tf.reduce_mean(marg_logit**2) + tf.reduce_mean(joint_logit**2)) else: gen_cost = tf.reduce_mean(joint_logit**2) elif args.gan_type == 'wgan-gp': joint_term = tf.reduce_mean(joint_logit) marg_term = tf.reduce_mean(marg_logit) disc_cost_mon = joint_term - marg_term if args.backprop: gen_cost = -disc_cost_mon else: gen_cost = -joint_term # compute gradient penalty alpha = tf.random_uniform(shape=(tf.shape(prediction)[0], 1)) interpolates = alpha * (prediction_perm - prediction_processed) interpolates += prediction_processed gradients = tf.gradients(discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) disc_cost = disc_cost_mon + args.gp_scaling * gradient_penalty elif args.gan_type == 'gan-gp': # same cost as default but with gradient penalty disc_cost_mon = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) + tf.reduce_mean(tf.nn.softplus(joint_logit))) if args.backprop: gen_cost = -disc_cost_mon else: gen_cost = -tf.reduce_mean(tf.nn.softplus(joint_logit)) gradients_joint = tf.gradients(joint_logit, [prediction_processed])[0] gradients_marg = tf.gradients(marg_logit, [prediction_perm])[0] ss_joint = tf.reduce_sum(tf.square(gradients_joint), reduction_indices=[1]) ss_marg = tf.reduce_sum(tf.square(gradients_marg), reduction_indices=[1]) gp_marg = tf.reduce_mean(ss_marg * (1 - tf.nn.sigmoid(marg_logit))**2) gp_joint = tf.reduce_mean(ss_joint * tf.nn.sigmoid(joint_logit)**2) disc_cost = disc_cost_mon + args.gp_scaling * (gp_joint + gp_marg) else: raise ValueError('Unknown GAN type') prediction_norm = prediction - tf.reduce_mean( prediction, 0, keep_dims=True) cov_mat = (tf.matmul(tf.transpose(prediction_norm), prediction_norm) / tf.cast(tf.shape(prediction)[0], prediction.dtype)) # This computes the average absolute value of the correlation matrix. # It can be an interesting value to monitor to see if the model is at least # able to remove the linear dependencies. diag = tf.diag_part(cov_mat) cor_mat = cov_mat / tf.sqrt(diag[:, None] * diag[None, :]) total_corr = ( (tf.reduce_sum(tf.abs(cor_mat)) - tf.cast(source_dim, 'float32')) / (source_dim * (source_dim - 1))) if args.normalize_rec: reconstruction = mixer(prediction_processed * gamma + beta) else: reconstruction = mixer(prediction) rec_cost = tf.abs(reconstruction - y) rec_cost = tf.reduce_mean(rec_cost) tot_cost = args.rec_scaling * rec_cost + args.ind_scaling * gen_cost train_step_sep = optimizer(args.learning_rate).minimize(tot_cost, var_list=sep_vars) train_step_disc = optimizer(args.learning_rate).minimize( disc_cost, var_list=disc_vars) max_corr = get_max_corr(x, prediction) summary_vars = OrderedDict({ 'total_corr': total_corr, 'total_cost': tot_cost, 'gen_cost': gen_cost, 'disc_cost': disc_cost, 'rec_cost': rec_cost }) if not args.blind: summary_vars['max_corr'] = max_corr if args.gan_type == 'wgan-gp': summary_vars['disc_cost_mon'] = disc_cost_mon # intialize session init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) def fig2rgb_array(fig, expand=True): fig.canvas.draw() buf = fig.canvas.tostring_rgb() ncols, nrows = fig.canvas.get_width_height() shape = (nrows, ncols, 3) if not expand else (1, nrows, ncols, 3) return np.fromstring(buf, dtype=np.uint8).reshape(shape) prediction_np = sess.run(prediction, feed_dict={y: train_y.T}) if args.plot_dim is None: num_signals = source_dim else: num_signals = args.plot_dim if args.blind: plot_fig = plot_signals(plt, prediction_np.T[:, :plot_size], prediction_np.T[:, :plot_size], n=num_signals) else: plot_fig = plot_signals(plt, train_x[:, :plot_size], prediction_np.T[:, :plot_size], n=num_signals) if args.folder is not None and not silent_mode: if not os.path.isdir(args.folder): os.makedirs(args.folder) print 'Saving logs to:', args.folder summary_lists = OrderedDict((key, []) for key in summary_vars) iteration_indices = [] def fig2rgb_array(fig, expand=True): fig.canvas.draw() buf = fig.canvas.tostring_rgb() ncols, nrows = fig.canvas.get_width_height() shape = (nrows, ncols, 3) if not expand else (1, nrows, ncols, 3) return np.fromstring(buf, dtype=np.uint8).reshape(shape) if args.source_dim > 7 and not args.blind: warnings.warn('Sourcedim > 7. Using approximate corr evaluation.') for i in range(args.iterations): if i % 1000 == 0: feed_dict = {y: val_y.T} if not args.blind: feed_dict[x] = val_x.T summary = sess.run(summary_vars.values(), feed_dict=feed_dict) prediction_np = sess.run(prediction, feed_dict={y: train_y.T}) if np.isnan(prediction_np[0, 0]): if silent_mode: return np.nan else: raise ValueError('NAN!') plt.gcf().clear() if args.blind: plot_fig = plot_signals(plt, prediction_np.T[:, :plot_size], prediction_np.T[:, :plot_size], n=num_signals) else: plot_fig = plot_signals(plt, train_x[:, :plot_size], prediction_np.T[:, :plot_size], n=num_signals) fig_rgb = fig2rgb_array(plot_fig.gcf(), expand=False) if args.visdom and not silent_mode: vis.image(fig_rgb.transpose(2, 0, 1), win='predictions', env=args.vd_env) if args.folder is not None: np.save(os.path.join(args.folder, 'output' + str(i) + '.npy'), prediction_np) iteration_indices.append(i) for summ_val, summ_name in zip(summary, summary_vars.keys()): if summ_name == 'max_corr' and not args.blind: if args.source_dim < 8: max_corr_np = get_max_corr_perm( prediction_np, train_x.T) else: max_corr_np = summ_val summary_lists['max_corr'].append(max_corr_np) else: if summ_name == 'total_cost': total_cost_np = summ_val summary_lists[summ_name].append(summ_val) if args.visdom and not silent_mode: vis.line(Y=np.asarray(summary_lists[summ_name]), X=np.asarray(iteration_indices), win=summ_name, env=args.vd_env, opts=dict(title=summ_name)) if not args.blind and not silent_mode: print i, 'Current max corr:', max_corr_np train_y_batch = get_random_batch(train_y.T, args.batch_size) train_step_sep.run(feed_dict={y: train_y_batch}, session=sess) for j in range(args.disc_updates): train_y_batch = get_random_batch(train_y.T, args.batch_size) train_step_disc.run(feed_dict={y: train_y_batch}, session=sess) # store final result somewhere in home folder together with the config if args.results_file is not None and not silent_mode: with open(args.results_file, 'w') as fout: fout.write(str(total_cost_np)) if not args.blind: fout.write(' ' + str(max_corr_np) + '\n') else: fout.write('\n') for arg, value in args.__dict__.items(): fout.write('{} : {}\n'.format(arg, value)) return total_cost_np
def main(args): # hyperparameters/settings EXP = False n_hidden = args.hidden_dim batch_size = 256 source_dim = 6 if args.data == 'synthetic' else 3 input_dim = source_dim with tf.variable_scope('separator'): separator = PNLMISEP(input_dim, n_hidden, args.block_weight_scaling, args.weight_stddev) sep_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='separator') y = tf.placeholder(tf.float32, shape=[None, input_dim]) prediction = separator.forward(y)[-1] prediction_norm = prediction - tf.reduce_mean( prediction, 0, keep_dims=True) cov_mat = (tf.matmul(tf.transpose(prediction_norm), prediction_norm) / tf.cast(tf.shape(prediction)[0], prediction.dtype)) tot_cost = -tf.reduce_mean(separator.get_log_det_jacobian2(y)) optimizer = tf.train.RMSPropOptimizer train_step_sep = optimizer(args.learning_rate).minimize(tot_cost, var_list=sep_vars) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) plot_size = 500 if args.data == 'synthetic': all_x, all_y, A = synthetic.get_data(seed=101, task_type='pnl', mix_dim=input_dim) val_x = all_x[:, :args.n_validation] val_y = all_y[:, :args.n_validation] train_x = all_x[:, args.n_validation:] train_y = all_y[:, args.n_validation:] elif args.data == 'audio': linear_mix, pnl_mix, A, sources = audio.get_data() all_y = pnl_mix all_x = sources val_x = all_x[:, :args.n_validation] val_y = all_y[:, :args.n_validation] train_x = all_x[:, args.n_validation:] train_y = all_y[:, args.n_validation:] plot_size = None else: raise ValueError('No data set specified') prediction_np = sess.run(prediction, feed_dict={y: train_y.T}) for i in range(500000): if i % 1000 == 0: prediction_np = sess.run(prediction, feed_dict={y: train_y.T}) if np.isnan(prediction_np[0, 0]): raise ValueError('NAN!') train_y_batch = get_random_batch(train_y.T, batch_size) train_step_sep.run(feed_dict={y: train_y_batch}, session=sess) tot_cost_np = sess.run(tot_cost, feed_dict={y: val_y.T}) max_corr_np = max_corr_np = get_max_corr_perm(prediction_np, train_x.T) if args.results_file is not None: with open(args.results_file, 'w') as fout: fout.write(str(tot_cost_np)) fout.write(' ' + str(max_corr_np) + '\n') for arg, value in args.__dict__.items(): fout.write('{} : {}\n'.format(arg, value))
def train(sess, args, adjTrainable, nlayers, use_sparse, bsize, nbatches, learningRate, nhidden, nsteps, maskLen, rank): seed = 123 np.random.seed(seed) tf.set_random_seed(seed) # Load data adj, x_train, y_train, x_val, y_val, x_test, y_test = utils.load_data() train_var, val_var, test_var = np.var(y_train), np.var(y_val), np.var( y_test) # Build model model = gconv.GraphConvLSTM(adj, x_train.shape[1], x_train.shape[2], num_layers=nlayers, n_steps=nsteps, n_hidden=nhidden, adj_trainable=adjTrainable, use_sparse=use_sparse, mask_len=maskLen, learning_rate=learningRate, rank=rank) init = tf.global_variables_initializer() # Initialize tensorflow variables sess.run(init) best_val = 99999999. best_test = 99999999. best_batch = 0 last_lr_update = 0 # Display parameters display_step = 1000 between_lr_updates = 500 lr_factor = 0.9 learningRate = sess.run(model.learning_rate_variable) cost_val = [] train_mse = 0 denom = 0. batches_complete = sess.run(model.global_step) saved_test_mse = 9999 # Train model while batches_complete < nbatches: x_train_b, y_train_b = utils.get_random_batch(x_train, y_train, nsteps, maskLen, bsize) t = time.time() # Construct feed dictionary feed_dict = utils.construct_feed_dict(x_train_b, y_train_b, model) feed_dict[model.learning_rate_variable] = learningRate # Training step _, batch_mse, batches_complete = sess.run( [model.opt_op, model.mse, model.global_step], feed_dict=feed_dict) train_mse += batch_mse batch_time = time.time() - t denom += 1 # Periodically compute validation and test loss if batches_complete % display_step == 0 or batches_complete == nbatches: # Validation val_mse, duration = utils.evaluate(x_val, y_val, model, sess, nsteps, maskLen) cost_val.append(val_mse) test_mse, duration = utils.evaluate(x_test, y_test, model, sess, nsteps, maskLen) # Print results print("Batch Number:%04d" % (batches_complete), "train_mse={:.5f}".format(train_mse / denom), "val_mse={:.5f}".format(val_mse), "test_mse={:.5f}".format(test_mse), "test_rsq={:.5f}".format(1 - (test_mse / test_var)), "time={:.5f}".format(batch_time), "lr={:.8f}".format(learningRate)) train_mse = 0 denom = 0. # Check if val loss is the best encountered so far if val_mse < best_val: best_val = val_mse saved_test_mse = test_mse best_batch = batches_complete - 1 if (batches_complete - best_batch > between_lr_updates) and ( batches_complete - last_lr_update > between_lr_updates): learningRate = learningRate * lr_factor last_lr_update = batches_complete print('best val mse: {0}, test mse: {1}, test rsq: {2}'.format( best_val, saved_test_mse, 1 - (saved_test_mse / test_var)))