def evaluate_retrieval(model, dataset, params, session): print('evaluating retrieval') print('computing vectors...') validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1)]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate((training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2] results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i]))
def plot_tsne(model, dataset, params): with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(params.model) saver.restore(session, ckpt.model_checkpoint_path) print('computing vectors...') validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1) ]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate( (training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print("TEST VECTORS: " + str(test_vectors.shape)) print(test_vectors) print("TEST LABELS: " + str(test_labels.shape)) print(training_labels[:100]) #data_X = pd.DataFrame(columns=["test_vectors", "test_labels"]) #data_X["test_vectors"] = test_vectors #data_X["test_labels"] = test_labels tsne = TSNE(perplexity=40) tsne_obj = tsne.fit_transform(test_vectors) tsne_df = pd.DataFrame({ 'X': tsne_obj[1:, 0], 'Y': tsne_obj[1:, 1], 'test_labels': test_labels.flatten()[1:] }) #print(tsne_df.head(20)) sns.scatterplot(x="X", y="Y", hue="test_labels", palette=sns.color_palette("deep", 10), legend='full', data=tsne_df) plt.show()
def evaluate(model, dataset, params): with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(params.model) saver.restore(session, ckpt.model_checkpoint_path) print('computing vectors...') validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1) ]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate( (training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') print("TRAINING VECTORS") print(training_vectors[0]) print("TRAINING LABELS") print(training_labels) recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2] results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i]))
def vectors(model, dataset, params): with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True) )) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) #ckpt = tf.train.get_checkpoint_state(params.model) #saver.restore(session, ckpt.model_checkpoint_path) for collection in dataset.collections: save_vectors( m.vectors( model, dataset.batches( collection, params.batch_size, num_epochs=1 ), session ), collection, params.model )
def train(model, dataset, params): log_dir = os.path.join(params.model, 'logs') model_dir = os.path.join(params.model, 'model') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: avg_loss = tf.placeholder(tf.float32, [], 'loss_ph') tf.summary.scalar('loss', avg_loss) validation = tf.placeholder(tf.float32, [], 'validation_ph') tf.summary.scalar('validation', validation) summary_writer = tf.summary.FileWriter(log_dir, session.graph) summaries = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) tf.local_variables_initializer().run() tf.global_variables_initializer().run() losses = [] # This currently streams from disk. You set num_epochs=1 and # wrap this call with something like itertools.cycle to keep # this data in memory. training_data = dataset.batches('training', params.batch_size) best_val = 0.0 training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) for step in range(params.num_steps + 1): _, x, seq_lengths = next(training_data) _, loss = session.run([model.opt, model.opt_loss], feed_dict={ model.x: x, model.seq_lengths: seq_lengths }) losses.append(loss) if step % params.log_every == 0: print('{}: {:.6f}'.format(step, loss)) if step and (step % params.save_every) == 0: validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) val = e.evaluate(training_vectors, validation_vectors, training_labels, validation_labels)[0] print('validation: {:.3f} (best: {:.3f})'.format( val, best_val or 0.0)) if val > best_val: best_val = val print('saving: {}'.format(model_dir)) saver.save(session, model_dir, global_step=step) summary, = session.run( [summaries], feed_dict={ model.x: x, model.seq_lengths: seq_lengths, validation: val, avg_loss: np.average(losses) }) summary_writer.add_summary(summary, step) summary_writer.flush() losses = []
def train(model, dataset, params): log_dir = os.path.join(params.model, 'logs') model_dir = os.path.join(params.model, 'model') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: avg_d_loss = tf.placeholder(tf.float32, [], 'd_loss_ph') tf.summary.scalar('d_loss', avg_d_loss) avg_g_loss = tf.placeholder(tf.float32, [], 'g_loss_ph') tf.summary.scalar('g_loss', avg_g_loss) validation = tf.placeholder(tf.float32, [], 'validation_ph') tf.summary.scalar('validation', validation) avg_al_loss = tf.placeholder(tf.float32, [], 'al_loss_ph') tf.summary.scalar('al_loss', avg_al_loss) summary_writer = tf.summary.FileWriter(log_dir, session.graph) summaries = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) tf.local_variables_initializer().run() tf.global_variables_initializer().run() d_losses = [] g_losses = [] al_g_losses = [] df_disc_losses = pd.DataFrame() df_gen_losses = pd.DataFrame() df_losses = pd.DataFrame() df_val_scores = pd.DataFrame() df_al_losses = pd.DataFrame() training_data = dataset.batches('training', params.batch_size) best_val = 0.0 training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) for step in range(params.num_steps + 1): _, x = next(training_data) ###### update discriminator d_loss_step = update_disc(model, x, model.D_solver, model.D_loss, params, session) d_losses.append(d_loss_step) df_disc_losses = df_disc_losses.append( { 'step': step, 'disc_loss': d_loss_step }, ignore_index=True) ###### update generator g_loss_list = [] for i in range(0, params.num_gen): g_loss_i, al_loss = update_gen(model, x, model.G_solver[i], model.Gen_loss[i], model.Al_solver, model.Al_gen_loss, params, session) g_loss_list.append(g_loss_i) al_g_losses.append(al_loss) df_al_losses = df_al_losses.append( { 'step': step, 'alpha_gen_loss': al_loss }, ignore_index=True) df_gen_losses = df_gen_losses.append( { 'step': step, 'g_0_loss': g_loss_list[0], 'g_1_loss': g_loss_list[1], 'g_2_loss': g_loss_list[2], 'g_3_loss': g_loss_list[3], 'g_4_loss': g_loss_list[4] }, ignore_index=True) g_losses.append(g_loss_list) ###### print discriminator and generators losses if step % params.log_every == 0: text = '{}: {:.6f} \t' g_losses_print = g_losses print(text.format( step, d_losses[-1], ), g_losses_print[-1], al_g_losses[-1]) ###### print best validation scores if step and (step % params.save_every) == 0: validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) val = e.evaluate(training_vectors, validation_vectors, training_labels, validation_labels)[0] print('validation: {:.3f} (best: {:.3f})'.format( val, best_val or 0.0)) df_val_scores = df_val_scores.append( { 'step': step, 'val_score': val, 'best_val_score': best_val }, ignore_index=True) if val > best_val: best_val = val print('saving: {}'.format(model_dir)) saver.save(session, model_dir, global_step=step) summary, = session.run( [summaries], feed_dict={ model.x: x, model.z: np.random.normal(0, 1, (params.batch_size, params.z_dim)), validation: val, avg_d_loss: np.average(d_losses), avg_al_loss: np.average(al_g_losses), avg_g_loss: np.average(g_losses) }) summary_writer.add_summary(summary, step) summary_writer.flush() d_losses = [] g_losses = [] ###### Store and plot discriminator and generator losses df_losses = df_disc_losses.join(df_gen_losses.set_index('step'), on='step', how='inner') df_losses = df_losses.join(df_al_losses.set_index('step'), on='step', how='inner') df_losses.to_csv(log_dir + '_gen_disc_losses.csv', index=False) df_val_scores.to_csv(log_dir + '_val_scores.csv', index=False) img_loss = df_losses.plot( x='step', y=['disc_loss', 'alpha_gen_loss'], kind='line', title='Trainings Losses: Discriminator & Generator', xlabel="Step", ylabel="Loss").get_figure() img_loss.savefig(log_dir + '_disc_gen_loss.png')
def evaluate(model, dataset, params): log_dir = os.path.join(params.model, 'logs') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(params.model) saver.restore(session, ckpt.model_checkpoint_path) print('computing vectors...') recall_values = [ 0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0 ] validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1) ]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate( (training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) df_precision_recall = pd.DataFrame(list(zip(recall_values, results)), columns=['recall', 'precision']) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i])) ###### Plot precision-recall values df_precision_recall.to_csv(log_dir + '_precision_recall_values.csv', index=False) img_precision_recall = df_precision_recall.plot( x='recall', y='precision', kind='line', title='Precision vs Recall', xlabel="Recall", ylabel="Precision").get_figure() img_precision_recall.savefig(log_dir + '_precision_recall.png')