def evaluate(print_grid=False): with tf.device('/gpu:0'): # run on specific device input_tensor, pred, gt = models.import_model(num_timesteps, num_feats, batch_size) dataset = data_loader.read_datasets(PREPROCESSED_DATA, dataset_type='test') saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, model_path) all_pred, all_gt = [], [] for i in range(updates_per_epoch): input_batch, gt_batch = dataset.next_batch(batch_size) pred_value = sess.run([pred], {input_tensor : input_batch, gt : [gt_batch]}) all_pred.append(pred_value) all_gt.append(gt_batch) num_align = 0 rmse = [] for i in range(len(all_pred)): if all_pred[i] == all_gt[i]: num_align += 1 rmse.append(np.sqrt(np.power((all_pred[i] - all_gt[i]), 2))) print "Accuracy:", float(num_align)/len(all_pred) print "Avg. RMSE", np.mean(rmse) print "Variance RMSE", np.var(rmse)
def train(): with tf.device('/gpu:0'): # run on specific device input_tensor, pred, gt = models.import_model(num_timesteps, num_feats, batch_size) loss = get_loss(pred, gt) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1.0) train = optimizer.minimize(loss=loss) dataset = data_loader.read_datasets(PREPROCESSED_DATA) saver = tf.train.Saver() # defaults to saving all variables # logging the loss function loss_placeholder = tf.placeholder(tf.float32) tf.scalar_summary('train_loss', loss_placeholder) merged = tf.merge_all_summaries() init = tf.initialize_all_variables() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: writer = tf.train.SummaryWriter(os.path.join(working_directory, 'logs'), sess.graph_def) sess.run(init) for epoch in range(max_epoch): training_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(updates_per_epoch, widgets=widgets) pbar.start() for i in range(updates_per_epoch): pbar.update(i) input_batch, gt_batch = dataset.next_batch(batch_size) _, loss_value = sess.run([train, loss], {input_tensor : input_batch, gt : [gt_batch]}) training_loss += np.sum(loss_value) training_loss = training_loss/(updates_per_epoch) print("Loss %f" % training_loss) # save model if epoch % save_frequency == 0: checkpoints_folder = os.path.join(working_directory, 'checkpoints') if not os.path.exists(checkpoints_folder): os.makedirs(checkpoints_folder) saver.save(sess, os.path.join(checkpoints_folder, 'model.ckpt'), global_step=epoch) # save summaries summary_str = sess.run(merged, feed_dict={input_tensor : input_batch, gt : [gt_batch], loss_placeholder: training_loss}) writer.add_summary(summary_str, global_step=epoch) writer.close()
def evaluate(print_grid=False): data_paths = [ conflict_data_file, climate_data_file, poverty_grid_file, poverty_mask_file ] dataset, conflict_mask, poverty_grid, poverty_mask = data_loader.read_datasets( data_paths, dataset_type='test') with tf.device('/gpu:0'): # run on specific device conflict_grids, climate_grids, pov_grid, pred, gt = models.import_model( num_timesteps, input_size, poverty_grid.shape, input_size) saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, model_path) all_pred, all_gt = [], [] for i in range(updates_per_epoch): conflict_grids_batch, gt_batch, climate_grids_batch = \ dataset.next_batch(batch_size) pred_value = sess.run( [pred], { conflict_grids: conflict_grids_batch, climate_grids: climate_grids_batch, pov_grid: poverty_grid, gt: gt_batch }) mask = conflict_mask * poverty_mask pred_value = pred_value * mask to_remove_idxs = np.where(mask.flatten() < 1) pred_value = np.delete(pred_value.flatten(), to_remove_idxs) gt_batch = np.delete(gt_batch.flatten(), to_remove_idxs) assert (len(pred_value) == len(gt_batch)) for k in range(len(pred_value)): all_pred.append(pred_value[k]) all_gt.append(gt_batch[k]) if print_grid: np.set_printoptions(precision=1, linewidth=150, suppress=True) print('-' * 80) print(np.squeeze(pred_value)) print(np.squeeze(gt_batch)) get_stats(all_pred, all_gt) print "Collecting stats for random predictions" all_random = np.random.randint(0, 2, (len(all_pred))) get_stats(all_random, all_gt)
def main(): word2vec_path = '../../romanian_word_vecs/cc.ro.300.bin' training_data_path = '../data-capsnets/scenario0/train.txt' test_data_path = '../data-capsnets/scenario0/test.txt' # Define the flags FLAGS = flags.define_app_flags('0-nodrop-seqmini') # Load data print('------------------load word2vec begin-------------------') w2v = data_loader.load_w2v(word2vec_path) print('------------------load word2vec end---------------------') data = data_loader.read_datasets(w2v, training_data_path, test_data_path) flags.set_data_flags(data) train(model_s2i.CapsNetS2I, data, FLAGS)
def main(): word2vec_path = '../../romanian_word_vecs/cleaned-vectors-diacritice-cc-100.vec' training_data_path = '../data/scenario1/train.txt' test_data_path = '../data/scenario1/test.txt' # Define the flags FLAGS = flags.define_app_flags('1-model') # Load data print('------------------load word2vec begin-------------------') w2v = data_loader.load_w2v(word2vec_path) print('------------------load word2vec end---------------------') data = data_loader.read_datasets(w2v, training_data_path, test_data_path) flags.set_data_flags(data) train(model.SemCapsNet, data, FLAGS)
def evaluate(print_grid=False): data_paths = [conflict_data_file, poverty_grid_file, poverty_mask_file] dataset, conflict_mask, poverty_grid, poverty_mask = data_loader.read_datasets(data_paths, dataset_type='test') with tf.device('/gpu:0'): # run on specific device conflict_grids, pov_grid, pred, gt = models.import_model(num_timesteps, input_size, poverty_grid.shape) saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, model_path) all_pred, all_gt = [], [] for i in range(updates_per_epoch): conflict_grids_batch, gt_batch = \ dataset.next_batch(batch_size) pred_value = sess.run([pred], {conflict_grids : conflict_grids_batch, pov_grid : poverty_grid, gt : gt_batch}) mask = conflict_mask * poverty_mask pred_value = pred_value * mask to_remove_idxs = np.where(mask.flatten() < 1) pred_value = np.delete(pred_value.flatten(), to_remove_idxs) gt_batch = np.delete(gt_batch.flatten(), to_remove_idxs) assert(len(pred_value) == len(gt_batch)) for k in range(len(pred_value)): all_pred.append(pred_value[k]) all_gt.append(gt_batch[k]) if print_grid: np.set_printoptions(precision=1, linewidth = 150, suppress=True) print('-'*80) print(np.squeeze(pred_value)) print(np.squeeze(gt_batch)) get_stats(all_pred, all_gt) print "Collecting stats for random predictions" all_random = np.random.randint(0, 2, (len(all_pred))) get_stats(all_random, all_gt)
def train(): data_paths = [ conflict_data_file, climate_data_file, poverty_grid_file, poverty_mask_file ] dataset, conflict_mask, poverty_grid, poverty_mask = data_loader.read_datasets( data_paths) with tf.device('/gpu:0'): # run on specific device conflict_grids, climate_grids, pov_grid, pred, gt = models.import_model( num_timesteps, input_size, poverty_grid.shape, input_size) loss = get_loss(pred, gt, conflict_mask, poverty_mask) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1.0) train = optimizer.minimize(loss=loss) saver = tf.train.Saver() # defaults to saving all variables # logging the loss function loss_placeholder = tf.placeholder(tf.float32) tf.scalar_summary('train_loss', loss_placeholder) merged = tf.merge_all_summaries() init = tf.initialize_all_variables() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: writer = tf.train.SummaryWriter( os.path.join(working_directory, 'logs'), sess.graph_def) sess.run(init) for epoch in range(max_epoch): training_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(updates_per_epoch, widgets=widgets) pbar.start() for i in range(updates_per_epoch): pbar.update(i) conflict_grids_batch, gt_batch, climate_grids_batch = \ dataset.next_batch(batch_size) _, loss_value = sess.run( [train, loss], { conflict_grids: conflict_grids_batch, climate_grids: climate_grids_batch, pov_grid: poverty_grid, gt: gt_batch }) training_loss += np.sum(loss_value) training_loss = training_loss / (updates_per_epoch * batch_size) print("Loss %f" % training_loss) # save model if epoch % save_frequency == 0: checkpoints_folder = os.path.join(working_directory, 'checkpoints') if not os.path.exists(checkpoints_folder): os.makedirs(checkpoints_folder) saver.save(sess, os.path.join(checkpoints_folder, 'model.ckpt'), global_step=epoch) # save summaries summary_str = sess.run(merged, feed_dict={ conflict_grids: conflict_grids_batch, climate_grids: climate_grids_batch, gt: gt_batch, pov_grid: poverty_grid, loss_placeholder: training_loss }) writer.add_summary(summary_str, global_step=epoch) writer.close()
def evaluate(print_grid=False): with tf.device('/gpu:0'): # run on specific device conflict_grids, pred, gt, mask = models.import_model( num_timesteps, input_size, batch_size) dataset = data_loader.read_datasets(data_file, dataset_type='test') saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, model_path) all_pred, all_gt = [], [] for i in range(updates_per_epoch): conflict_grids_batch, gt_batch, mask_batch = \ dataset.next_batch(batch_size) pred_value = sess.run( [pred], { conflict_grids: conflict_grids_batch, gt: gt_batch, mask: mask_batch }) pred_value = pred_value * mask_batch to_remove_idxs = np.where(mask_batch.flatten() < 1) pred_value = np.delete(pred_value.flatten(), to_remove_idxs) gt_batch = np.delete(gt_batch.flatten(), to_remove_idxs) assert (len(pred_value) == len(gt_batch)) for k in range(len(pred_value)): all_pred.append(pred_value[k]) all_gt.append(gt_batch[k]) if print_grid: np.set_printoptions(precision=1, linewidth=150, suppress=True) print('-' * 80) print(np.squeeze(pred_value)) print(np.squeeze(gt_batch)) assert (len(all_pred) == len(all_gt)) num_align = 0 for i in range(len(all_pred)): if all_gt[i] > 0: if all_pred[i] > 0.5: num_align += 1 elif all_gt[i] < 1: if all_pred[i] <= 0.5: num_align += 1 print "Aligned:", float(num_align) / len(all_pred) threshold = 0.5 precision_num, precision_denom = 0.0, 0.0 for i in range(len(all_pred)): if all_gt[i] == 1: if all_pred[i] >= threshold: precision_num += 1 precision_denom += 1 else: if all_pred[i] >= threshold: precision_denom += 1 recall_num, recall_denom = 0.0, 0.0 for i in range(len(all_pred)): if all_gt[i] == 1: if all_pred[i] >= threshold: recall_num += 1 recall_denom += 1 else: recall_denom += 1 print "Precision", float(precision_num) / precision_denom print "Recall", float(recall_num) / recall_denom
def evaluate(print_grid=False): data_paths = [conflict_data_file, poverty_grid_file, poverty_mask_file] dataset, conflict_mask, poverty_grid, poverty_mask = data_loader.read_datasets(data_paths, dataset_type='test') with tf.device('/gpu:0'): # run on specific device conflict_grids, pov_grid, pred, gt = models.import_model(num_timesteps, input_size, poverty_grid.shape) saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, model_path) all_pred, all_gt = [], [] for i in range(updates_per_epoch): conflict_grids_batch, gt_batch = \ dataset.next_batch(batch_size) pred_value = sess.run([pred], {conflict_grids : conflict_grids_batch, pov_grid : poverty_grid, gt : gt_batch}) mask = conflict_mask * poverty_mask pred_value = pred_value * mask to_remove_idxs = np.where(mask.flatten() < 1) pred_value = np.delete(pred_value.flatten(), to_remove_idxs) gt_batch = np.delete(gt_batch.flatten(), to_remove_idxs) assert(len(pred_value) == len(gt_batch)) for k in range(len(pred_value)): all_pred.append(pred_value[k]) all_gt.append(gt_batch[k]) if print_grid: np.set_printoptions(precision=1, linewidth = 150, suppress=True) print('-'*80) print(np.squeeze(pred_value)) print(np.squeeze(gt_batch)) assert(len(all_pred) == len(all_gt)) num_align = 0 for i in range(len(all_pred)): if all_gt[i] > 0: if all_pred[i] > 0.5: num_align += 1 elif all_gt[i] < 1: if all_pred[i] <= 0.5: num_align += 1 print "Aligned:", float(num_align)/len(all_pred) threshold = 0.5 precision_num, precision_denom = 0.0, 0.0 for i in range(len(all_pred)): if all_gt[i] == 1: if all_pred[i] >= threshold: precision_num += 1 precision_denom += 1 else: if all_pred[i] >= threshold: precision_denom += 1 recall_num, recall_denom = 0.0, 0.0 for i in range(len(all_pred)): if all_gt[i] == 1: if all_pred[i] >= threshold: recall_num += 1 recall_denom += 1 else: recall_denom += 1 precision, recall, thresholds = precision_recall_curve(all_gt, all_pred) print "Precision", float(precision_num)/precision_denom print "Recall", float(recall_num)/recall_denom