def save_current_scores(self): if self._istep < 0: raise RuntimeWarning('no scores evaluated; scores not saved') else: savefpath = os.path.join(self._logdir, 'scores_end_block%03d.npz' % self._blockwriter.iblock) save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores, 'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores} print('saving scores to %s' % savefpath) utils.save_scores(savefpath, save_kwargs)
def main(): # Parameters data_directory = '../../data/generated-data-r-10-n-6-4/' features_path = '../../data/features-generated-data-r-10-n-6-4' booking_file = '../../data/booking.csv' users_file = '../../data/user.csv' rating_thresholds = [] true_objects_indexes = [0, 1, 2, 3, 4, 5] false_objects_indexes = [6, 7, 8, 9] file_names = os.listdir(data_directory) img_ids_vector = [int(name.split('-')[0]) for name in file_names] ratings_vector = [int(name.split('-')[-2]) for name in file_names] name_vector = [data_directory + name for name in file_names] images_indexes = [name.split('-')[3].split('.')[0] for name in file_names] ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data( data_directory, booking_file, users_file, rating_thresholds) features = get_features(features_path, name_vector) fa = FeatureAgglomeration(n_clusters=50) fa.fit(features) features = fa.transform(features) scores_auc = [] scores_rmse = [] for i in range(10): cv_results_file = '../results/cv-generated-data-r-10-n-6-4-rf-fa-' + str( i) + '.csv' selection = ObjectSelection(show_selection_results=False, selection_algorithm='rf') selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector, z_score=False) selection.evaluate(evaluation_metric='auc') selection.evaluate(evaluation_metric='rmse') print('\n\n-----\n\n') score_auc, score_rmse = selection.evaluate(evaluation_metric='auc') scores_auc.append(score_auc) scores_rmse.append(score_rmse) results_file = '../scores/generated-data-r-10-n-6-4-rf-fa-auc.csv' save_scores(scores_auc, results_file) results_file = '../scores/generated-data-r-10-n-6-4-rf-fa-rmse.csv' save_scores(scores_rmse, results_file)
def save_current_scores(self): """ Save scores for current images to log_dir """ if self._istep < 0: raise RuntimeWarning('no scores evaluated; scores not saved') else: save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores} if self._stoch: save_kwargs.update({'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores}) savefpath = os.path.join(self._logdir, 'scores_step%03d.npz' % self._istep) print('saving scores to %s' % savefpath) utils.save_scores(savefpath, save_kwargs)
def save_current_scores(self): """ Save scores for current images to log_dir """ if self._istep < 0: raise RuntimeWarning('no scores evaluated; scores not saved') else: save_kwargs = { 'image_ids': self._curr_imgids, 'scores': self._curr_scores } if self._stoch: save_kwargs.update({ 'scores_reps': self._curr_scores_reps, 'scores_no_stoch': self._curr_scores_no_stoch }) savefpath = ospath.join(self._logdir, f'scores_step{self._istep:03d}.npz') print('saving scores to', savefpath) utils.save_scores(savefpath, save_kwargs)
scores_rmse = [] for i in range(10): cv_results_file = '../results/cv-generated-data-r-10-n-04-z-rf-' + str( i) + '.csv' selection = BasicFactorization(show_selection_results=False, selection_algorithm='rf') selection.transform(ids=ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=text_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector, z_score=True) score_auc, score_rmse = selection.evaluate(evaluation_metric='auc') scores_auc.append(score_auc) scores_rmse.append(score_rmse) scores_auc.sort() plt.title('AUC za izbrane slike') plt.plot(scores_auc) plt.ylabel('AUC') plt.show() results_file = '../scores/generated-data-r-10-n-04-z-rf-auc.csv' save_scores(scores_auc, results_file) results_file = '../scores/generated-data-r-10-n-04-z-rf-rmse.csv' save_scores(scores_rmse, results_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--policy_name", default="TD3") # Policy name parser.add_argument("--env_name", default="Pendulum-v0") # OpenAI gym environment name parser.add_argument("--replay_buffer", default="prioritized") # Replay Buffer type parser.add_argument("--replay_buffer_size", default=5e4, type=int) # Replay Buffer capacity parser.add_argument("--replay_buffer_alpha", default=0.6, type=float) # Replay Buffer prioritization weight parser.add_argument("--seed", default=0, type=int) # Sets Gym, PyTorch and Numpy seeds parser.add_argument("--start_timesteps", default=1e4, type=int) # How many time steps purely random policy is run for parser.add_argument("--eval_freq", default=1e3, type=float) # How often (time steps) we evaluate parser.add_argument("--max_timesteps", default=5e4, type=float) # Max time steps to run environment for parser.add_argument("--save_models", default="True", type=bool) # Whether or not models are saved parser.add_argument("--expl_noise", default=0.1, type=float) # Std of Gaussian exploration noise parser.add_argument("--batch_size", default=100, type=int) # Batch size for both actor and critic parser.add_argument("--discount", default=0.99, type=float) # Discount factor parser.add_argument("--tau", default=0.005, type=float) # Target network update rate parser.add_argument("--policy_noise", default=0.2, type=float) # Noise added to target policy during critic update parser.add_argument("--noise_clip", default=0.5, type=float) # Range to clip target policy noise parser.add_argument("--policy_freq", default=2, type=int) # Frequency of delayed policy updates parser.add_argument("--lr_actor", default=0.001, type=float) # Learning rate of actor parser.add_argument("--lr_critic", default=0.001, type=float) # Learning rate of critic parser.add_argument("--prioritized_replay_eps", default=1e-3, type=float) # Replay Buffer epsilon (PRE) parser.add_argument("--prioritized_replay_beta0", default=0.4, type=float) # Replay Buffer initial beta (PRE) args = parser.parse_args() #Training kwargs kwargs = { "policy_name": args.policy_name, "env_name": args.env_name, "replay_buffer": args.replay_buffer, "replay_buffer_size": args.replay_buffer_size, "replay_buffer_alpha": args.replay_buffer_alpha, "seed": args.seed, "start_timesteps": args.start_timesteps, "eval_freq": args.eval_freq, "max_timesteps": args.max_timesteps, "save_models": args.save_models, "expl_noise": args.expl_noise, "batch_size": args.batch_size, "discount": args.discount, "tau": args.tau, "policy_noise": args.policy_noise, "noise_clip": args.noise_clip, "policy_freq": args.policy_freq, "lr_actor": args.lr_actor, "prioritized_replay_eps": args.prioritized_replay_eps, "prioritized_replay_beta0": args.prioritized_replay_beta0 } # cls os.system('cls' if os.name == 'nt' else 'clear') if not os.path.exists("./results"): os.makedirs("./results") if args.save_models and not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Time stamp for repeated test names ts = time.time() ts = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S') test_name = "%s_%s_%s_%s" % (args.policy_name, args.env_name, str(args.seed), ts) plot_name = "%s_%s_%s_%s_plot.png" % (args.policy_name, args.env_name, str(args.seed), ts) kwargs_name = "%s_%s_%s_%s_kwargs.csv" % (args.policy_name, args.env_name, str(args.seed), ts) scores_name = "%s_%s_%s_%s_scores.csv" % (args.policy_name, args.env_name, str(args.seed), ts) print("---------------------------------------") print("Settings: %s" % (test_name)) utils.save_kwargs(kwargs, "./results/%s" % (kwargs_name)) print("---------------------------------------") # Environment and Agent instantiation env = gym.make(args.env_name) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Instantiate Replay Buffer if args.replay_buffer == "vanilla": replay_buffer = rb.ReplayBuffer(size = args.replay_buffer_size) PER = False elif args.replay_buffer == "prioritized": replay_buffer = rb.PrioritizedReplayBuffer(size = int(np.round(np.sqrt(args.replay_buffer_size))), alpha = args.replay_buffer_alpha) PER = True prioritized_replay_beta_iters = args.max_timesteps prioritized_replay_beta0 = args.prioritized_replay_beta0 beta_schedule = LinearSchedule(prioritized_replay_beta_iters, initial_p = prioritized_replay_beta0, final_p = 1.0) # Instantiate policy if args.policy_name == "TD3": policy = TD3.TD3(state_dim, action_dim, max_action, args.lr_actor, args.lr_critic, PER, args.prioritized_replay_eps) elif args.policy_name == "DDPG": policy = DDPG.DDPG(state_dim, action_dim, max_action, args.lr_actor, args.lr_critic, PER, args.prioritized_replay_eps) # Evaluate untrained policy evaluations = [evaluate_policy(env, policy)] # Training loop ####################################### total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 episode_rewards = [] done = True while total_timesteps < args.max_timesteps: if done: if total_timesteps != 0: print('Total T: {} Episode Num: {} Episode T: {} Reward: {}'.format(total_timesteps, episode_num, episode_timesteps, episode_reward)) episode_rewards.append(episode_reward) # PER Beta scheduled update if PER: beta = beta_schedule.value(total_timesteps) else: beta = 0. # Policy update step if args.policy_name == "TD3": policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, args.policy_noise, args.noise_clip, args.policy_freq, beta) else: policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, beta) # Evaluate episode if timesteps_since_eval >= args.eval_freq: timesteps_since_eval %= args.eval_freq evaluations.append(evaluate_policy(env, policy)) # save evaluation #if args.save_models: policy.save(test_name, directory="./pytorch_models") #np.save("./results/%s" % (test_name), evaluations) # Reset environment obs = env.reset() done = False episode_reward = 0 episode_timesteps = 0 episode_num += 1 # Select action randomly or according to policy if total_timesteps < args.start_timesteps: action = env.action_space.sample() else: action = policy.select_action(np.array(obs)) if args.expl_noise != 0: action = (action + np.random.normal(0, args.expl_noise, size=env.action_space.shape[0])).clip(env.action_space.low, env.action_space.high) # Perform action new_obs, reward, done, _ = env.step(action) done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done) episode_reward += reward # Push experience into replay buffer experience = (obs, action, reward, new_obs, done_bool) replay_buffer.add(experience) obs = new_obs episode_timesteps += 1 total_timesteps += 1 timesteps_since_eval += 1 # Final evaluation evaluations.append(evaluate_policy(env, policy)) # Save results if args.save_models: policy.save("%s" % (test_name), directory="./pytorch_models") #np.save("./results/%s" % (evaluations_file), evaluations) #np.save("./results/%s" % ('rewards.txt'), episode_rewards) utils.save_scores(episode_rewards, "./results/%s" % (scores_name)) utils.plot(episode_rewards, "./results/%s" % (plot_name), 1)
def main(): # Parameters data_directory = '../data/generated-data-r-10-n-8-2/' features_path = '../data/features-generated-data-r-10-n-8-2' booking_file = '../data/booking.csv' users_file = '../data/user.csv' rating_thresholds = [] true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7] false_objects_indexes = [8, 9] file_names = os.listdir(data_directory) img_ids_vector = [int(name.split('-')[0]) for name in file_names] ratings_vector = [int(name.split('-')[-2]) for name in file_names] name_vector = [data_directory + name for name in file_names] images_indexes = [name.split('-')[3].split('.')[0] for name in file_names] ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data( data_directory, booking_file, users_file, rating_thresholds) features = get_features(features_path, name_vector) fa = FeatureAgglomeration(n_clusters=50) fa.fit(features) features = fa.transform(features) scores = [] cv_results_file = './results/bf_real.csv' #ratings_matrix = ratings_matrix[:30, :30] #selection = BasicFactorization(show_selection_results=False, selection_algorithm='random') #selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, # users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, # true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, # paths=name_vector, z_score=True) #score, score_rmse = selection.evaluate(evaluation_metric='auc') #scores.append(score) #exit() # K Nearest Neighbors #cv_results_file = './results/cv-generated-data-nr-2-n-02-l-100-knn.csv' scores_auc = [] scores_rmse = [] for i in range(1): cv_results_file = './results/xxp1-cv-generated-data-r-10-n-8-2-random-' + str( i) + '.csv' selection = ObjectSelection(show_selection_results=False, selection_algorithm='random') selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector, z_score=False) selection.evaluate(evaluation_metric='auc') selection.evaluate(evaluation_metric='rmse') print('\n\n-----\n\n') score_auc, score_rmse = selection.evaluate(evaluation_metric='auc') scores_auc.append(score_auc) scores_rmse.append(score_rmse) results_file = './scores/v-generated-data-r-10-n-8-2-random-fa-auc.csv' save_scores(scores_auc, results_file) results_file = './scores/v-generated-data-r-10-n-8-2-random-fa-rmse.csv' save_scores(scores_rmse, results_file) exit() for i in range(10): print() for _ in range(0): selection = ObjectSelection(show_selection_results=False, selection_algorithm='random') # selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file) selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector, z_score=True) print('\n\n-----\n\n') score_auc, score_rmse = selection.evaluate(evaluation_metric='auc') scores.append(score_auc) for i in range(10): print() for _ in range(10): selection = BasicFactorization(show_selection_results=False, selection_algorithm='random') selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector) score = selection.evaluate(evaluation_metric='auc') scores.append(score) exit() # Parameters #data_directory = '../data/experience-6/' #features_path = '../data/features-experience-6' data_directory = '../data/generated-data-r-2-n-8-2/' features_path = '../data/features-generated-data-r-2-n-8-2' booking_file = '../data/booking.csv' users_file = '../data/user.csv' cv_results_file = 'results/cv-generated-data-r-2-n-8-2-x.csv' true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7] false_objects_indexes = [8, 9] #file_to_delete = data_directory + '.DS_Store' #os.remove(file_to_delete) file_names = os.listdir(data_directory) img_ids_vector = [int(name.split('-')[0]) for name in file_names] ratings_vector = [int(name.split('-')[-2]) for name in file_names] name_vector = [data_directory + name for name in file_names] images_indexes = [name.split('-')[3].split('.')[0] for name in file_names] rating_thresholds = [1, 2] #rating_thresholds = [] ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data( data_directory, booking_file, users_file, rating_thresholds, binary=True) features = get_features(features_path, name_vector) cv_results_file = './results/cv-generated-data-r-2-n-8-2-knn-y.csv' selection = ObjectSelection(show_selection_results=False, selection_algorithm='random') selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector, use_user_data=True) selection.evaluate(evaluation_metric='auc') exit() selection = BasicFactorizationNmf(show_selection_results=True, selection_algorithm='random') selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes, true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes, paths=name_vector) selection.evaluate(evaluation_metric='auc')
continue vec = torch.LongTensor(np.array([vocab[gold_candidate]])) if model.use_cuda: vec = center_vec.cuda() mu_s, sigma_s, inf_mu_s, inf_sigma_s, z_s = model.get_distribution( vec, context_vec) score_mu = cosine_similarity(mu.squeeze().detach().cpu().numpy(), mu_s.squeeze().detach().cpu().numpy()) scores_mu[lst_item.complete_word, lst_item.sentence_id].append( (gold_candidate, score_mu)) # TODO not sure if minus is reqd # posterior (word) (inf) || prior (candidate) kl_prior = -1 * kl_div(inf_mu, inf_sigma, mu_s, sigma_s) scores_kl_prior[lst_item.complete_word, lst_item.sentence_id].append( (gold_candidate, kl_prior.item())) kl_post = -1 * kl_div(inf_mu, inf_sigma, inf_mu_s, inf_sigma_s) scores_kl_post[lst_item.complete_word, lst_item.sentence_id].append( (gold_candidate, kl_post.item())) print("Skipped: {}".format(skipped)) save_scores(lst, scores_mu, "bs_mu_lst.out") save_scores(lst, scores_kl_post, "bs_kl_post_lst.out") save_scores(lst, scores_kl_prior, "bs_kl_prior_lst_mu.out")
if batch_n % 200 == 0: # if True: with tf.device('CPU'): train_sums = list( tf.concat(greedy_seqs.values, axis=0).numpy()) train_inds = list( tf.concat(batch[-1].values, axis=0).numpy().squeeze()) articles = [article[x] for x in train_inds] gt_summaries = [summary[x] for x in train_inds] examples_oovs = [oovs[x] for x in train_inds] scores, summaries, time_step_masks = env.get_rewards( gt_summaries, train_sums, examples_oovs) save_examples(examples_folder, articles, gt_summaries, summaries, epoch, batch_n, 'train') save_scores(metrics_folder, scores, 'train') mean_epoch_loss = np.mean(losses) losses = [] save_loss(metrics_folder, mean_epoch_loss, 'train') val_losses = [] val_sums = [] val_inds = [] val_iterator = iter(val_dist_dataset) for val_batch_n in range(1, min(10, batches_per_epoch)): batch = next(val_iterator) loss, greedy_seqs = distributed_step(batch, 'val') val_losses.append(loss) with tf.device('CPU'):
checkpoints_folder, experiment_name) val_batches_per_epoch = len(val_tf_dataset) val_sums = [] val_inds = [] val_iterator = iter(val_dist_dataset) for val_batch_n in tqdm(range(val_batches_per_epoch)): batch = next(val_iterator) if check_shapes(batch): greedy_seqs = distributed_step(batch) with tf.device('CPU'): val_sums += list(tf.concat(greedy_seqs.values, axis=0).numpy()) val_inds += list( tf.concat(batch[-1].values, axis=0).numpy().squeeze()) articles = [val_article[x] for x in val_inds] gt_summaries = [val_summary[x] for x in val_inds] examples_oovs = [val_oovs[x] for x in val_inds] scores, summaries, time_step_masks = env.get_rewards(gt_summaries, val_sums, examples_oovs) save_examples(examples_folder, articles, gt_summaries, summaries, 'NA', 'NA', 'test', stage='test') save_scores(metrics_folder, scores, 'test')
if experiment_type not in data: experiment_type = experiment_type.replace( "_cv", "").replace("_instances", "") folds = data[experiment_type] for fold_idx, (X_train, _, X_test, y_test) in enumerate(folds): for iter_idx in range(cfg["num_iterations"]): np.random.seed(iter_idx) model = model_dict["cls"](**model_dict["params"]) model.fit(X_train) y_test_pred = model.predict_proba(X_test)[:, 1] iter_roc, iter_ap = calculate_metrics( y_test, y_test_pred) save_model(model, y_test_pred, experiment_dir, fold_idx, iter_idx) roc += iter_roc ap += iter_ap roc = np.round(roc / num_experiments, decimals=4) ap = np.round(ap / num_experiments, decimals=4) save_scores(roc, ap, experiment_dir)
batch = next(iterator) if check_shapes(batch): loss, greedy_seqs, _ = distributed_step(batch, 'train') losses.append(loss) if batch_n % 200 == 0: with tf.device('CPU'): train_sums = list(tf.concat(greedy_seqs.values, axis=0).numpy()) train_inds = list(tf.concat(batch[-1].values, axis=0).numpy().squeeze()) articles = [article[x] for x in train_inds] gt_summaries = [summary[x] for x in train_inds] examples_oovs = [oovs[x] for x in train_inds] scores, summaries, time_step_masks = env.get_rewards(gt_summaries, train_sums, examples_oovs) save_examples(examples_folder, articles, gt_summaries, summaries, epoch, batch_n, 'train', stage='pretrain') save_scores(metrics_folder, scores, 'pretrain') mean_epoch_loss = np.mean(losses) losses = [] save_loss(metrics_folder, mean_epoch_loss, 'pretrain') val_losses = [] val_sums = [] val_inds = [] val_iterator = iter(val_dist_dataset) for val_batch_n in range(1, min(10, batches_per_epoch)): batch = next(val_iterator) loss, greedy_seqs, _ = distributed_step(batch, 'val') val_losses.append(loss) with tf.device('CPU'):
scores_window.append(score) # save most recent score scores.append(score) # save most recent score avg_av = agent.evaluate_on_fixed_set(fixed_states) average_action_values.append(avg_av) print(f'Episode {i_episode}\tAverage Score: ' f'{round(np.mean(scores_window),4)}\tEpsilon: {round(eps, 4)}\t' f'Average Q value: {round(avg_av, 4)}') if i_episode % conf['save_every'] == 0 and i_episode > 0: print(f'Saving model at iteration: {i_episode}') save_model(conf, agent) env.close() return { 'scores': scores, 'epsilons': epsilons, 'avg_action_values': average_action_values } if __name__ == '__main__': arguments = parse_args() pc = arguments.path_config exp_conf = read_yaml(pc) stats = train(exp_conf) save_scores(exp_conf, stats)
_, mu_all, sigma_all, _, _ = model.x_inference(en_matrix, neg_samples_en) center_mu = mu_all[center_idx] center_sigma = sigma_all[center_idx] #print(center_mu, center_sigma) for gold_candidate in gold_dict[lst_item.target_word]: if gold_candidate not in en_vocab.index: # TODO print something out and track continue en_sentence = lst_item.tokenized_sentence en_sentence[center_idx] = gold_candidate en_matrix, en_words = to_index(en_sentence, en_vocab) en_matrix = torch.LongTensor(en_matrix) neg_samples_en = torch.LongTensor( get_negative_batch(en_vocab, params["n_negative"], en_words)) _, mu_all, sigma_all, _, _ = model.x_inference( en_matrix, neg_samples_en) candidate_mu = mu_all[center_idx] candidate_sigma = sigma_all[center_idx] #print(candidate_mu, candidate_sigma) kl = kl_div(center_mu, center_sigma, candidate_mu, candidate_sigma) scores[lst_item.complete_word, lst_item.sentence_id].append( (gold_candidate, kl.item())) print("Skipped: {}".format(skipped)) save_scores(lst, scores, "eam.out")
def save_current_scores(self): savefpath = os.path.join(self._backupdir, 'scores_end_block%03d.npz' % self._istep) save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores} # , 'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores} print('saving scores to %s' % savefpath) utils.save_scores(savefpath, save_kwargs)
with open('results/evaluation_data.parser_output', 'a') as f: if my_parsing is None: f.write("Found no viable parsing." + "\n") else: f.write(my_parsing + "\n") if my_parsing is not None: # EVALPB works if we remove first and last brackets of the SEQUOIA format and the extra spaces that come with it real_parsing = real_parsing[2:-1] my_parsing = my_parsing[2:-1] print("Score PYEVALB:") real_tree = parser.create_from_bracket_string(real_parsing) test_tree = parser.create_from_bracket_string(my_parsing) result = scorer.Scorer().score_trees(real_tree, test_tree) print('accuracy ' + str(result.tag_accracy)) # for evaluation on the whole corpus, we save real_parsing # and_my_parsing in new files without first and last brackets with open('results/real_parsings_test_for_eval.txt', 'a') as f: f.write(real_parsing + "\n") with open('results/my_parsings_test_for_eval.txt', 'a') as f: f.write(my_parsing + "\n") save_scores( 'results/real_parsings_test_for_eval.txt', 'results/my_parsings_test_for_eval.txt', 'results/results_pyevalb.txt', )
def save_block_stats(self, save_kwargs, namestr): '''More general version of save_current_scores. save anything you like''' savefpath = os.path.join(self._backupdir, '%s_block%03d.npz' % (namestr, self._istep)) print('saving %s to %s' % (namestr, savefpath)) utils.save_scores(savefpath, save_kwargs)
def run(path_to_net, label_dir, nii_dir, plotter, batch_size=32, test_split=0.3, random_state=666, epochs=8, learning_rate=0.0001, momentum=0.9, num_folds=5): """ Applies training and validation on the network """ print('Setting started', flush=True) nii_filenames = np.asarray(glob.glob(nii_dir + '/*.npy')) print('Number of files: ', len(nii_filenames), flush=True) # Creating data indices dataset_size = len(nii_filenames) indices = list(range(dataset_size)) test_indices, trainset_indices = utils.get_test_indices( indices, test_split) # kfold index generator for cv_num, (train_idx, val_idx) in enumerate( utils.get_train_cv_indices(trainset_indices, num_folds, random_state)): # take from trainset_indices the kfold generated ones train_indices = np.asarray(trainset_indices)[np.asarray(train_idx)] val_indices = np.asarray(trainset_indices)[np.asarray(val_idx)] print('cv cycle number: ', cv_num, flush=True) net = Net() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') num_GPU = torch.cuda.device_count() print('Device: ', device, flush=True) if num_GPU > 1: print('Let us use', num_GPU, 'GPUs!', flush=True) net = nn.DataParallel(net) net.to(device) # weigh the loss with the size of classes # class 0: 3268 # class 1: 60248 weight = torch.tensor([1. / 3268., 1. / 60248.]).to(device) criterion = nn.CrossEntropyLoss(weight=weight) optimizer = optim.Adam(net.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer, threshold=1e-6, patience=0, verbose=True) fMRI_dataset_train = dataset.fMRIDataset(label_dir, nii_dir, train_indices, transform=dataset.ToTensor()) fMRI_dataset_val = dataset.fMRIDataset(label_dir, nii_dir, val_indices, transform=dataset.ToTensor()) datalengths = { 'train': len(fMRI_dataset_train), 'val': len(fMRI_dataset_val) } dataloaders = { 'train': utils.get_dataloader(fMRI_dataset_train, batch_size, num_GPU), 'val': utils.get_dataloader(fMRI_dataset_val, batch_size, num_GPU) } print('Train set length {}, Val set length {}: '.format( datalengths['train'], datalengths['val'])) # Setup metrics running_metrics_val = metrics.BinaryClassificationMeter() running_metrics_train = metrics.BinaryClassificationMeter() val_loss_meter = metrics.averageLossMeter() train_loss_meter = metrics.averageLossMeter() # Track iteration number over epochs for plotter itr = 0 # Track lowest loss over epochs for saving network lowest_loss = 100000 for epoch in tqdm(range(epochs), desc='Epochs'): print('Epoch: ', epoch + 1, flush=True) print('Phase: train', flush=True) phase = 'train' # Set model to training mode net.train(True) # Iterate over data. for i, data in tqdm(enumerate(dataloaders[phase]), desc='Dataiteration_train'): train_pred, train_labels, train_loss = train( data, optimizer, net, criterion, device) running_metrics_train.update(train_pred, train_labels) train_loss_meter.update(train_loss, n=1) if (i + 1) % 10 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) itr += 1 score = running_metrics_train.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) print(k, v, flush=True) print('Loss Train', train_loss_meter.avg, flush=True) plotter.plot('Loss', 'itr', phase, 'Loss Train', itr, train_loss_meter.avg) utils.save_scores(running_metrics_train.get_history(), phase, cv_num) utils.save_loss(train_loss_meter.get_history(), phase, cv_num) print('Phase: val', flush=True) phase = 'val' # Set model to validation mode net.train(False) with torch.no_grad(): for i, data in tqdm(enumerate(dataloaders[phase]), desc='Dataiteration_val'): val_pred, val_labels, val_loss = val( data, net, criterion, device) running_metrics_val.update(val_pred, val_labels) val_loss_meter.update(val_loss, n=1) if (i + 1) % 10 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) utils.save_scores(running_metrics_val.get_history(), phase, cv_num) utils.save_loss(val_loss_meter.get_history(), phase, cv_num) if val_loss_meter.avg < lowest_loss: lowest_loss = val_loss_meter.avg utils.save_net(path_to_net, batch_size, epoch, cv_num, train_indices, val_indices, test_indices, net, optimizer, criterion, iter_num=i) # Plot validation metrics and loss at the end of the val phase score = running_metrics_val.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) print(k, v, flush=True) print('Loss Val', val_loss_meter.avg, flush=True) plotter.plot('Loss', 'itr', phase, 'Loss Val', itr, val_loss_meter.avg) print( 'Epoch [{}/{}], Train_loss: {:.4f}, Train_bacc: {:.2f}'.format( epoch + 1, epochs, train_loss_meter.avg, running_metrics_train.bacc), flush=True) print('Epoch [{}/{}], Val_loss: {:.4f}, Val_bacc: {:.2f}'.format( epoch + 1, epochs, val_loss_meter.avg, running_metrics_val.bacc), flush=True) # Call the learning rate adjustment function after every epoch scheduler.step(train_loss_meter.avg) # Save net after every cross validation cycle utils.save_net(path_to_net, batch_size, epochs, cv_num, train_indices, val_indices, test_indices, net, optimizer, criterion)