Exemple #1
0
 def save_current_scores(self):
     if self._istep < 0:
         raise RuntimeWarning('no scores evaluated; scores not saved')
     else:
         savefpath = os.path.join(self._logdir, 'scores_end_block%03d.npz' % self._blockwriter.iblock)
         save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores,
                        'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores}
         print('saving scores to %s' % savefpath)
         utils.save_scores(savefpath, save_kwargs)
def main():

    # Parameters
    data_directory = '../../data/generated-data-r-10-n-6-4/'
    features_path = '../../data/features-generated-data-r-10-n-6-4'
    booking_file = '../../data/booking.csv'
    users_file = '../../data/user.csv'
    rating_thresholds = []
    true_objects_indexes = [0, 1, 2, 3, 4, 5]
    false_objects_indexes = [6, 7, 8, 9]

    file_names = os.listdir(data_directory)
    img_ids_vector = [int(name.split('-')[0]) for name in file_names]
    ratings_vector = [int(name.split('-')[-2]) for name in file_names]
    name_vector = [data_directory + name for name in file_names]
    images_indexes = [name.split('-')[3].split('.')[0] for name in file_names]

    ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data(
        data_directory, booking_file, users_file, rating_thresholds)

    features = get_features(features_path, name_vector)

    fa = FeatureAgglomeration(n_clusters=50)
    fa.fit(features)
    features = fa.transform(features)

    scores_auc = []
    scores_rmse = []
    for i in range(10):
        cv_results_file = '../results/cv-generated-data-r-10-n-6-4-rf-fa-' + str(
            i) + '.csv'
        selection = ObjectSelection(show_selection_results=False,
                                    selection_algorithm='rf')
        selection.transform(ids=img_ids_vector,
                            features=features,
                            ratings=ratings_vector,
                            users_ratings=ratings_matrix,
                            users=users_matrix,
                            cv_results_file=cv_results_file,
                            images_indexes=images_indexes,
                            true_objects_indexes=true_objects_indexes,
                            false_objects_indexes=false_objects_indexes,
                            paths=name_vector,
                            z_score=False)
        selection.evaluate(evaluation_metric='auc')
        selection.evaluate(evaluation_metric='rmse')
        print('\n\n-----\n\n')
        score_auc, score_rmse = selection.evaluate(evaluation_metric='auc')
        scores_auc.append(score_auc)
        scores_rmse.append(score_rmse)

    results_file = '../scores/generated-data-r-10-n-6-4-rf-fa-auc.csv'
    save_scores(scores_auc, results_file)
    results_file = '../scores/generated-data-r-10-n-6-4-rf-fa-rmse.csv'
    save_scores(scores_rmse, results_file)
Exemple #3
0
 def save_current_scores(self):
     """
     Save scores for current images to log_dir
     """
     if self._istep < 0:
         raise RuntimeWarning('no scores evaluated; scores not saved')
     else:
         save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores}
         if self._stoch:
             save_kwargs.update({'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores})
         savefpath = os.path.join(self._logdir, 'scores_step%03d.npz' % self._istep)
         print('saving scores to %s' % savefpath)
         utils.save_scores(savefpath, save_kwargs)
Exemple #4
0
 def save_current_scores(self):
     """
     Save scores for current images to log_dir
     """
     if self._istep < 0:
         raise RuntimeWarning('no scores evaluated; scores not saved')
     else:
         save_kwargs = {
             'image_ids': self._curr_imgids,
             'scores': self._curr_scores
         }
         if self._stoch:
             save_kwargs.update({
                 'scores_reps':
                 self._curr_scores_reps,
                 'scores_no_stoch':
                 self._curr_scores_no_stoch
             })
         savefpath = ospath.join(self._logdir,
                                 f'scores_step{self._istep:03d}.npz')
         print('saving scores to', savefpath)
         utils.save_scores(savefpath, save_kwargs)
Exemple #5
0
scores_rmse = []
for i in range(10):
    cv_results_file = '../results/cv-generated-data-r-10-n-04-z-rf-' + str(
        i) + '.csv'
    selection = BasicFactorization(show_selection_results=False,
                                   selection_algorithm='rf')
    selection.transform(ids=ids_vector,
                        features=features,
                        ratings=ratings_vector,
                        users_ratings=ratings_matrix,
                        users=users_matrix,
                        cv_results_file=cv_results_file,
                        images_indexes=text_indexes,
                        true_objects_indexes=true_objects_indexes,
                        false_objects_indexes=false_objects_indexes,
                        paths=name_vector,
                        z_score=True)
    score_auc, score_rmse = selection.evaluate(evaluation_metric='auc')
    scores_auc.append(score_auc)
    scores_rmse.append(score_rmse)

scores_auc.sort()
plt.title('AUC za izbrane slike')
plt.plot(scores_auc)
plt.ylabel('AUC')
plt.show()

results_file = '../scores/generated-data-r-10-n-04-z-rf-auc.csv'
save_scores(scores_auc, results_file)
results_file = '../scores/generated-data-r-10-n-04-z-rf-rmse.csv'
save_scores(scores_rmse, results_file)
Exemple #6
0
def main():
	
	parser = argparse.ArgumentParser()
	parser.add_argument("--policy_name", default="TD3")							# Policy name
	parser.add_argument("--env_name", default="Pendulum-v0")					# OpenAI gym environment name
	parser.add_argument("--replay_buffer", default="prioritized")				# Replay Buffer type
	parser.add_argument("--replay_buffer_size", default=5e4, type=int)			# Replay Buffer capacity
	parser.add_argument("--replay_buffer_alpha", default=0.6, type=float)		# Replay Buffer prioritization weight
	parser.add_argument("--seed", default=0, type=int)							# Sets Gym, PyTorch and Numpy seeds
	parser.add_argument("--start_timesteps", default=1e4, type=int)				# How many time steps purely random policy is run for
	parser.add_argument("--eval_freq", default=1e3, type=float)					# How often (time steps) we evaluate
	parser.add_argument("--max_timesteps", default=5e4, type=float)				# Max time steps to run environment for
	parser.add_argument("--save_models", default="True", type=bool)				# Whether or not models are saved
	parser.add_argument("--expl_noise", default=0.1, type=float)				# Std of Gaussian exploration noise
	parser.add_argument("--batch_size", default=100, type=int)					# Batch size for both actor and critic
	parser.add_argument("--discount", default=0.99, type=float)					# Discount factor
	parser.add_argument("--tau", default=0.005, type=float)						# Target network update rate
	parser.add_argument("--policy_noise", default=0.2, type=float)				# Noise added to target policy during critic update
	parser.add_argument("--noise_clip", default=0.5, type=float)				# Range to clip target policy noise
	parser.add_argument("--policy_freq", default=2, type=int)					# Frequency of delayed policy updates
	parser.add_argument("--lr_actor", default=0.001, type=float)				# Learning rate of actor
	parser.add_argument("--lr_critic", default=0.001, type=float)				# Learning rate of critic
	parser.add_argument("--prioritized_replay_eps", default=1e-3, type=float)	# Replay Buffer epsilon (PRE)
	parser.add_argument("--prioritized_replay_beta0", default=0.4, type=float)	# Replay Buffer initial beta (PRE)
	args = parser.parse_args()

#Training kwargs
	kwargs = {  "policy_name": args.policy_name,
				"env_name": args.env_name,
				"replay_buffer": args.replay_buffer,
				"replay_buffer_size": args.replay_buffer_size,
				"replay_buffer_alpha": args.replay_buffer_alpha,
				"seed": args.seed,
				"start_timesteps": args.start_timesteps,
				"eval_freq": args.eval_freq,
				"max_timesteps": args.max_timesteps,
				"save_models": args.save_models,
				"expl_noise": args.expl_noise,
				"batch_size": args.batch_size,
				"discount": args.discount,
				"tau": args.tau,
				"policy_noise": args.policy_noise,
				"noise_clip": args.noise_clip,
				"policy_freq": args.policy_freq,
				"lr_actor": args.lr_actor,
				"prioritized_replay_eps": args.prioritized_replay_eps,
				"prioritized_replay_beta0": args.prioritized_replay_beta0
         }

	# cls
	os.system('cls' if os.name == 'nt' else 'clear')

	if not os.path.exists("./results"):
    		os.makedirs("./results")
	if args.save_models and not os.path.exists("./pytorch_models"):
		os.makedirs("./pytorch_models")

	# Time stamp for repeated test names
	ts = time.time()
	ts = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')

	test_name = "%s_%s_%s_%s" % (args.policy_name, args.env_name, str(args.seed), ts)
	plot_name = "%s_%s_%s_%s_plot.png" % (args.policy_name, args.env_name, str(args.seed), ts)
	kwargs_name = "%s_%s_%s_%s_kwargs.csv" % (args.policy_name, args.env_name, str(args.seed), ts)
	scores_name = "%s_%s_%s_%s_scores.csv" % (args.policy_name, args.env_name, str(args.seed), ts)

	print("---------------------------------------")
	print("Settings: %s" % (test_name))
	utils.save_kwargs(kwargs, "./results/%s" % (kwargs_name))
	print("---------------------------------------")

	# Environment and Agent instantiation

	env = gym.make(args.env_name)

	# Set seeds
	env.seed(args.seed)
	torch.manual_seed(args.seed)
	np.random.seed(args.seed)
	
	state_dim = env.observation_space.shape[0]
	action_dim = env.action_space.shape[0] 
	max_action = float(env.action_space.high[0])

	# Instantiate Replay Buffer	
	if args.replay_buffer == "vanilla": 
		replay_buffer = rb.ReplayBuffer(size = args.replay_buffer_size)
		PER = False
	elif args.replay_buffer == "prioritized": 
		replay_buffer = rb.PrioritizedReplayBuffer(size = int(np.round(np.sqrt(args.replay_buffer_size))), 
												   alpha = args.replay_buffer_alpha)
		PER = True
		prioritized_replay_beta_iters = args.max_timesteps
		prioritized_replay_beta0 = args.prioritized_replay_beta0
		beta_schedule = LinearSchedule(prioritized_replay_beta_iters,
                                       initial_p = prioritized_replay_beta0,
                                       final_p = 1.0)

	# Instantiate policy
	if args.policy_name == "TD3": policy = TD3.TD3(state_dim, action_dim, max_action, args.lr_actor, args.lr_critic, PER, args.prioritized_replay_eps)
	elif args.policy_name == "DDPG": policy = DDPG.DDPG(state_dim, action_dim, max_action, args.lr_actor, args.lr_critic, PER, args.prioritized_replay_eps)

	# Evaluate untrained policy
	evaluations = [evaluate_policy(env, policy)] 

	# Training loop #######################################

	total_timesteps = 0
	timesteps_since_eval = 0
	episode_num = 0
	episode_rewards = []
	done = True 

	while total_timesteps < args.max_timesteps:
		
		if done: 

			if total_timesteps != 0: 
				print('Total T: {} Episode Num: {} Episode T: {} Reward: {}'.format(total_timesteps, episode_num, episode_timesteps, episode_reward))
				episode_rewards.append(episode_reward)
				
				# PER Beta scheduled update 
				if PER: beta = beta_schedule.value(total_timesteps)
				else: beta = 0.
				# Policy update step
				if args.policy_name == "TD3":
					policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, args.policy_noise, args.noise_clip, args.policy_freq, beta)
				else: 
					policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, beta)
			
			# Evaluate episode
			if timesteps_since_eval >= args.eval_freq:
				timesteps_since_eval %= args.eval_freq
				evaluations.append(evaluate_policy(env, policy))
				
				# save evaluation
				#if args.save_models: policy.save(test_name, directory="./pytorch_models")
				#np.save("./results/%s" % (test_name), evaluations) 
			
			# Reset environment
			obs = env.reset()
			done = False
			episode_reward = 0
			episode_timesteps = 0
			episode_num += 1 
		
		# Select action randomly or according to policy
		if total_timesteps < args.start_timesteps:
			action = env.action_space.sample()
		else:
			action = policy.select_action(np.array(obs))
			if args.expl_noise != 0: 
				action = (action + np.random.normal(0, args.expl_noise, size=env.action_space.shape[0])).clip(env.action_space.low, env.action_space.high)

		# Perform action
		new_obs, reward, done, _ = env.step(action) 
		done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done)
		episode_reward += reward

		# Push experience into replay buffer
		experience = (obs, action, reward, new_obs, done_bool)
		replay_buffer.add(experience)

		obs = new_obs

		episode_timesteps += 1
		total_timesteps += 1
		timesteps_since_eval += 1
		
	# Final evaluation 
	evaluations.append(evaluate_policy(env, policy))
	
	# Save results
	if args.save_models: policy.save("%s" % (test_name), directory="./pytorch_models")
	#np.save("./results/%s" % (evaluations_file), evaluations)  
	#np.save("./results/%s" % ('rewards.txt'), episode_rewards) 
	utils.save_scores(episode_rewards, "./results/%s" % (scores_name))
	utils.plot(episode_rewards, "./results/%s" % (plot_name), 1)
def main():

    # Parameters
    data_directory = '../data/generated-data-r-10-n-8-2/'
    features_path = '../data/features-generated-data-r-10-n-8-2'
    booking_file = '../data/booking.csv'
    users_file = '../data/user.csv'
    rating_thresholds = []
    true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7]
    false_objects_indexes = [8, 9]

    file_names = os.listdir(data_directory)
    img_ids_vector = [int(name.split('-')[0]) for name in file_names]
    ratings_vector = [int(name.split('-')[-2]) for name in file_names]
    name_vector = [data_directory + name for name in file_names]
    images_indexes = [name.split('-')[3].split('.')[0] for name in file_names]

    ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data(
        data_directory, booking_file, users_file, rating_thresholds)

    features = get_features(features_path, name_vector)

    fa = FeatureAgglomeration(n_clusters=50)
    fa.fit(features)
    features = fa.transform(features)

    scores = []
    cv_results_file = './results/bf_real.csv'

    #ratings_matrix = ratings_matrix[:30, :30]
    #selection = BasicFactorization(show_selection_results=False, selection_algorithm='random')
    #selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix,
    #                    users=users_matrix, cv_results_file=cv_results_file, images_indexes=images_indexes,
    #                    true_objects_indexes=true_objects_indexes, false_objects_indexes=false_objects_indexes,
    #                    paths=name_vector, z_score=True)
    #score, score_rmse = selection.evaluate(evaluation_metric='auc')
    #scores.append(score)

    #exit()

    # K Nearest Neighbors
    #cv_results_file = './results/cv-generated-data-nr-2-n-02-l-100-knn.csv'
    scores_auc = []
    scores_rmse = []
    for i in range(1):
        cv_results_file = './results/xxp1-cv-generated-data-r-10-n-8-2-random-' + str(
            i) + '.csv'
        selection = ObjectSelection(show_selection_results=False,
                                    selection_algorithm='random')
        selection.transform(ids=img_ids_vector,
                            features=features,
                            ratings=ratings_vector,
                            users_ratings=ratings_matrix,
                            users=users_matrix,
                            cv_results_file=cv_results_file,
                            images_indexes=images_indexes,
                            true_objects_indexes=true_objects_indexes,
                            false_objects_indexes=false_objects_indexes,
                            paths=name_vector,
                            z_score=False)
        selection.evaluate(evaluation_metric='auc')
        selection.evaluate(evaluation_metric='rmse')
        print('\n\n-----\n\n')
        score_auc, score_rmse = selection.evaluate(evaluation_metric='auc')
        scores_auc.append(score_auc)
        scores_rmse.append(score_rmse)

    results_file = './scores/v-generated-data-r-10-n-8-2-random-fa-auc.csv'
    save_scores(scores_auc, results_file)
    results_file = './scores/v-generated-data-r-10-n-8-2-random-fa-rmse.csv'
    save_scores(scores_rmse, results_file)

    exit()

    for i in range(10):
        print()

    for _ in range(0):
        selection = ObjectSelection(show_selection_results=False,
                                    selection_algorithm='random')
        # selection.transform(ids=img_ids_vector, features=features, ratings=ratings_vector, users_ratings=ratings_matrix, users=users_matrix, cv_results_file=cv_results_file)
        selection.transform(ids=img_ids_vector,
                            features=features,
                            ratings=ratings_vector,
                            users_ratings=ratings_matrix,
                            users=users_matrix,
                            cv_results_file=cv_results_file,
                            images_indexes=images_indexes,
                            true_objects_indexes=true_objects_indexes,
                            false_objects_indexes=false_objects_indexes,
                            paths=name_vector,
                            z_score=True)
        print('\n\n-----\n\n')
        score_auc, score_rmse = selection.evaluate(evaluation_metric='auc')
        scores.append(score_auc)

    for i in range(10):
        print()

    for _ in range(10):
        selection = BasicFactorization(show_selection_results=False,
                                       selection_algorithm='random')
        selection.transform(ids=img_ids_vector,
                            features=features,
                            ratings=ratings_vector,
                            users_ratings=ratings_matrix,
                            users=users_matrix,
                            cv_results_file=cv_results_file,
                            images_indexes=images_indexes,
                            true_objects_indexes=true_objects_indexes,
                            false_objects_indexes=false_objects_indexes,
                            paths=name_vector)
        score = selection.evaluate(evaluation_metric='auc')
        scores.append(score)

    exit()

    # Parameters
    #data_directory = '../data/experience-6/'
    #features_path = '../data/features-experience-6'
    data_directory = '../data/generated-data-r-2-n-8-2/'
    features_path = '../data/features-generated-data-r-2-n-8-2'
    booking_file = '../data/booking.csv'
    users_file = '../data/user.csv'
    cv_results_file = 'results/cv-generated-data-r-2-n-8-2-x.csv'
    true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7]
    false_objects_indexes = [8, 9]

    #file_to_delete = data_directory + '.DS_Store'
    #os.remove(file_to_delete)

    file_names = os.listdir(data_directory)
    img_ids_vector = [int(name.split('-')[0]) for name in file_names]
    ratings_vector = [int(name.split('-')[-2]) for name in file_names]
    name_vector = [data_directory + name for name in file_names]
    images_indexes = [name.split('-')[3].split('.')[0] for name in file_names]
    rating_thresholds = [1, 2]
    #rating_thresholds = []

    ratings_matrix, images_indexes_for_id, ids_indexes, users_matrix = load_data(
        data_directory,
        booking_file,
        users_file,
        rating_thresholds,
        binary=True)

    features = get_features(features_path, name_vector)

    cv_results_file = './results/cv-generated-data-r-2-n-8-2-knn-y.csv'

    selection = ObjectSelection(show_selection_results=False,
                                selection_algorithm='random')
    selection.transform(ids=img_ids_vector,
                        features=features,
                        ratings=ratings_vector,
                        users_ratings=ratings_matrix,
                        users=users_matrix,
                        cv_results_file=cv_results_file,
                        images_indexes=images_indexes,
                        true_objects_indexes=true_objects_indexes,
                        false_objects_indexes=false_objects_indexes,
                        paths=name_vector,
                        use_user_data=True)
    selection.evaluate(evaluation_metric='auc')

    exit()

    selection = BasicFactorizationNmf(show_selection_results=True,
                                      selection_algorithm='random')
    selection.transform(ids=img_ids_vector,
                        features=features,
                        ratings=ratings_vector,
                        users_ratings=ratings_matrix,
                        users=users_matrix,
                        cv_results_file=cv_results_file,
                        images_indexes=images_indexes,
                        true_objects_indexes=true_objects_indexes,
                        false_objects_indexes=false_objects_indexes,
                        paths=name_vector)
    selection.evaluate(evaluation_metric='auc')
Exemple #8
0
                continue
            vec = torch.LongTensor(np.array([vocab[gold_candidate]]))

            if model.use_cuda:
                vec = center_vec.cuda()

            mu_s, sigma_s, inf_mu_s, inf_sigma_s, z_s = model.get_distribution(
                vec, context_vec)
            score_mu = cosine_similarity(mu.squeeze().detach().cpu().numpy(),
                                         mu_s.squeeze().detach().cpu().numpy())
            scores_mu[lst_item.complete_word, lst_item.sentence_id].append(
                (gold_candidate, score_mu))

            # TODO not sure if minus is reqd
            # posterior (word) (inf) || prior (candidate)
            kl_prior = -1 * kl_div(inf_mu, inf_sigma, mu_s, sigma_s)
            scores_kl_prior[lst_item.complete_word,
                            lst_item.sentence_id].append(
                                (gold_candidate, kl_prior.item()))

            kl_post = -1 * kl_div(inf_mu, inf_sigma, inf_mu_s, inf_sigma_s)
            scores_kl_post[lst_item.complete_word,
                           lst_item.sentence_id].append(
                               (gold_candidate, kl_post.item()))

    print("Skipped: {}".format(skipped))

    save_scores(lst, scores_mu, "bs_mu_lst.out")
    save_scores(lst, scores_kl_post, "bs_kl_post_lst.out")
    save_scores(lst, scores_kl_prior, "bs_kl_prior_lst_mu.out")
Exemple #9
0
        if batch_n % 200 == 0:
            # if True:
            with tf.device('CPU'):
                train_sums = list(
                    tf.concat(greedy_seqs.values, axis=0).numpy())
                train_inds = list(
                    tf.concat(batch[-1].values, axis=0).numpy().squeeze())

            articles = [article[x] for x in train_inds]
            gt_summaries = [summary[x] for x in train_inds]
            examples_oovs = [oovs[x] for x in train_inds]
            scores, summaries, time_step_masks = env.get_rewards(
                gt_summaries, train_sums, examples_oovs)
            save_examples(examples_folder, articles, gt_summaries, summaries,
                          epoch, batch_n, 'train')
            save_scores(metrics_folder, scores, 'train')

            mean_epoch_loss = np.mean(losses)
            losses = []
            save_loss(metrics_folder, mean_epoch_loss, 'train')

            val_losses = []
            val_sums = []
            val_inds = []
            val_iterator = iter(val_dist_dataset)
            for val_batch_n in range(1, min(10, batches_per_epoch)):
                batch = next(val_iterator)
                loss, greedy_seqs = distributed_step(batch, 'val')
                val_losses.append(loss)

                with tf.device('CPU'):
Exemple #10
0
    checkpoints_folder, experiment_name)
val_batches_per_epoch = len(val_tf_dataset)

val_sums = []
val_inds = []
val_iterator = iter(val_dist_dataset)
for val_batch_n in tqdm(range(val_batches_per_epoch)):
    batch = next(val_iterator)
    if check_shapes(batch):
        greedy_seqs = distributed_step(batch)

        with tf.device('CPU'):
            val_sums += list(tf.concat(greedy_seqs.values, axis=0).numpy())
            val_inds += list(
                tf.concat(batch[-1].values, axis=0).numpy().squeeze())

articles = [val_article[x] for x in val_inds]
gt_summaries = [val_summary[x] for x in val_inds]
examples_oovs = [val_oovs[x] for x in val_inds]
scores, summaries, time_step_masks = env.get_rewards(gt_summaries, val_sums,
                                                     examples_oovs)
save_examples(examples_folder,
              articles,
              gt_summaries,
              summaries,
              'NA',
              'NA',
              'test',
              stage='test')
save_scores(metrics_folder, scores, 'test')
Exemple #11
0
                if experiment_type not in data:
                    experiment_type = experiment_type.replace(
                        "_cv", "").replace("_instances", "")

                folds = data[experiment_type]

                for fold_idx, (X_train, _, X_test, y_test) in enumerate(folds):

                    for iter_idx in range(cfg["num_iterations"]):
                        np.random.seed(iter_idx)

                        model = model_dict["cls"](**model_dict["params"])

                        model.fit(X_train)

                        y_test_pred = model.predict_proba(X_test)[:, 1]

                        iter_roc, iter_ap = calculate_metrics(
                            y_test, y_test_pred)

                        save_model(model, y_test_pred, experiment_dir,
                                   fold_idx, iter_idx)

                        roc += iter_roc
                        ap += iter_ap

                roc = np.round(roc / num_experiments, decimals=4)
                ap = np.round(ap / num_experiments, decimals=4)

                save_scores(roc, ap, experiment_dir)
Exemple #12
0
        batch = next(iterator)
        if check_shapes(batch):
            loss, greedy_seqs, _ = distributed_step(batch, 'train')
            losses.append(loss)

        if batch_n % 200 == 0:
            with tf.device('CPU'):
                train_sums = list(tf.concat(greedy_seqs.values, axis=0).numpy())
                train_inds = list(tf.concat(batch[-1].values, axis=0).numpy().squeeze())

            articles = [article[x] for x in train_inds]
            gt_summaries = [summary[x] for x in train_inds]
            examples_oovs = [oovs[x] for x in train_inds]
            scores, summaries, time_step_masks = env.get_rewards(gt_summaries, train_sums, examples_oovs)
            save_examples(examples_folder, articles, gt_summaries, summaries, epoch, batch_n, 'train', stage='pretrain')
            save_scores(metrics_folder, scores, 'pretrain')

            mean_epoch_loss = np.mean(losses)
            losses = []
            save_loss(metrics_folder, mean_epoch_loss, 'pretrain')

            val_losses = []
            val_sums = []
            val_inds = []
            val_iterator = iter(val_dist_dataset)
            for val_batch_n in range(1, min(10, batches_per_epoch)):
                batch = next(val_iterator)
                loss, greedy_seqs, _ = distributed_step(batch, 'val')
                val_losses.append(loss)

                with tf.device('CPU'):
Exemple #13
0
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        avg_av = agent.evaluate_on_fixed_set(fixed_states)
        average_action_values.append(avg_av)

        print(f'Episode {i_episode}\tAverage Score: '
              f'{round(np.mean(scores_window),4)}\tEpsilon: {round(eps, 4)}\t'
              f'Average Q value: {round(avg_av, 4)}')

        if i_episode % conf['save_every'] == 0 and i_episode > 0:
            print(f'Saving model at iteration: {i_episode}')
            save_model(conf, agent)

    env.close()

    return {
        'scores': scores,
        'epsilons': epsilons,
        'avg_action_values': average_action_values
    }


if __name__ == '__main__':

    arguments = parse_args()
    pc = arguments.path_config
    exp_conf = read_yaml(pc)

    stats = train(exp_conf)
    save_scores(exp_conf, stats)
        _, mu_all, sigma_all, _, _ = model.x_inference(en_matrix,
                                                       neg_samples_en)
        center_mu = mu_all[center_idx]
        center_sigma = sigma_all[center_idx]
        #print(center_mu, center_sigma)
        for gold_candidate in gold_dict[lst_item.target_word]:
            if gold_candidate not in en_vocab.index:
                # TODO print something out and track
                continue

            en_sentence = lst_item.tokenized_sentence
            en_sentence[center_idx] = gold_candidate
            en_matrix, en_words = to_index(en_sentence, en_vocab)
            en_matrix = torch.LongTensor(en_matrix)
            neg_samples_en = torch.LongTensor(
                get_negative_batch(en_vocab, params["n_negative"], en_words))

            _, mu_all, sigma_all, _, _ = model.x_inference(
                en_matrix, neg_samples_en)
            candidate_mu = mu_all[center_idx]
            candidate_sigma = sigma_all[center_idx]
            #print(candidate_mu, candidate_sigma)

            kl = kl_div(center_mu, center_sigma, candidate_mu, candidate_sigma)
            scores[lst_item.complete_word, lst_item.sentence_id].append(
                (gold_candidate, kl.item()))

    print("Skipped: {}".format(skipped))

    save_scores(lst, scores, "eam.out")
Exemple #15
0
 def save_current_scores(self):
     savefpath = os.path.join(self._backupdir, 'scores_end_block%03d.npz' % self._istep)
     save_kwargs = {'image_ids': self._curr_imgids, 'scores': self._curr_scores}
     #               , 'scores_mat': self._curr_scores_mat, 'nscores': self._curr_nscores}
     print('saving scores to %s' % savefpath)
     utils.save_scores(savefpath, save_kwargs)
Exemple #16
0
    with open('results/evaluation_data.parser_output', 'a') as f:
        if my_parsing is None:
            f.write("Found no viable parsing." + "\n")
        else:
            f.write(my_parsing + "\n")

    if my_parsing is not None:
        # EVALPB works if we remove first and last brackets of the SEQUOIA format and the extra spaces that come with it
        real_parsing = real_parsing[2:-1]
        my_parsing = my_parsing[2:-1]

        print("Score PYEVALB:")
        real_tree = parser.create_from_bracket_string(real_parsing)
        test_tree = parser.create_from_bracket_string(my_parsing)
        result = scorer.Scorer().score_trees(real_tree, test_tree)
        print('accuracy ' + str(result.tag_accracy))

        # for evaluation on the whole corpus, we save real_parsing
        # and_my_parsing in new files without first and last brackets
        with open('results/real_parsings_test_for_eval.txt', 'a') as f:
            f.write(real_parsing + "\n")

        with open('results/my_parsings_test_for_eval.txt', 'a') as f:
            f.write(my_parsing + "\n")

save_scores(
    'results/real_parsings_test_for_eval.txt',
    'results/my_parsings_test_for_eval.txt',
    'results/results_pyevalb.txt',
)
Exemple #17
0
 def save_block_stats(self, save_kwargs, namestr):
     '''More general version of save_current_scores. save anything you like'''
     savefpath = os.path.join(self._backupdir, '%s_block%03d.npz' % (namestr, self._istep))
     print('saving %s to %s' % (namestr, savefpath))
     utils.save_scores(savefpath, save_kwargs)
Exemple #18
0
def run(path_to_net,
        label_dir,
        nii_dir,
        plotter,
        batch_size=32,
        test_split=0.3,
        random_state=666,
        epochs=8,
        learning_rate=0.0001,
        momentum=0.9,
        num_folds=5):
    """
    Applies training and validation on the network 
    """
    print('Setting started', flush=True)
    nii_filenames = np.asarray(glob.glob(nii_dir + '/*.npy'))
    print('Number of files: ', len(nii_filenames), flush=True)
    # Creating data indices
    dataset_size = len(nii_filenames)
    indices = list(range(dataset_size))
    test_indices, trainset_indices = utils.get_test_indices(
        indices, test_split)
    # kfold index generator
    for cv_num, (train_idx, val_idx) in enumerate(
            utils.get_train_cv_indices(trainset_indices, num_folds,
                                       random_state)):
        # take from trainset_indices the kfold generated ones
        train_indices = np.asarray(trainset_indices)[np.asarray(train_idx)]
        val_indices = np.asarray(trainset_indices)[np.asarray(val_idx)]
        print('cv cycle number: ', cv_num, flush=True)
        net = Net()
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        num_GPU = torch.cuda.device_count()
        print('Device: ', device, flush=True)
        if num_GPU > 1:
            print('Let us use', num_GPU, 'GPUs!', flush=True)
            net = nn.DataParallel(net)
        net.to(device)
        # weigh the loss with the size of classes
        # class 0: 3268
        # class 1: 60248
        weight = torch.tensor([1. / 3268., 1. / 60248.]).to(device)
        criterion = nn.CrossEntropyLoss(weight=weight)
        optimizer = optim.Adam(net.parameters(), lr=learning_rate)
        scheduler = ReduceLROnPlateau(optimizer,
                                      threshold=1e-6,
                                      patience=0,
                                      verbose=True)
        fMRI_dataset_train = dataset.fMRIDataset(label_dir,
                                                 nii_dir,
                                                 train_indices,
                                                 transform=dataset.ToTensor())
        fMRI_dataset_val = dataset.fMRIDataset(label_dir,
                                               nii_dir,
                                               val_indices,
                                               transform=dataset.ToTensor())
        datalengths = {
            'train': len(fMRI_dataset_train),
            'val': len(fMRI_dataset_val)
        }
        dataloaders = {
            'train': utils.get_dataloader(fMRI_dataset_train, batch_size,
                                          num_GPU),
            'val': utils.get_dataloader(fMRI_dataset_val, batch_size, num_GPU)
        }
        print('Train set length {}, Val set length {}: '.format(
            datalengths['train'], datalengths['val']))
        # Setup metrics
        running_metrics_val = metrics.BinaryClassificationMeter()
        running_metrics_train = metrics.BinaryClassificationMeter()
        val_loss_meter = metrics.averageLossMeter()
        train_loss_meter = metrics.averageLossMeter()
        # Track iteration number over epochs for plotter
        itr = 0
        # Track lowest loss over epochs for saving network
        lowest_loss = 100000
        for epoch in tqdm(range(epochs), desc='Epochs'):
            print('Epoch: ', epoch + 1, flush=True)
            print('Phase: train', flush=True)
            phase = 'train'
            # Set model to training mode
            net.train(True)
            # Iterate over data.
            for i, data in tqdm(enumerate(dataloaders[phase]),
                                desc='Dataiteration_train'):
                train_pred, train_labels, train_loss = train(
                    data, optimizer, net, criterion, device)
                running_metrics_train.update(train_pred, train_labels)
                train_loss_meter.update(train_loss, n=1)
                if (i + 1) % 10 == 0:
                    print('Number of Iteration [{}/{}]'.format(
                        i + 1, int(datalengths[phase] / batch_size)),
                          flush=True)
                    itr += 1
                    score = running_metrics_train.get_scores()
                    for k, v in score.items():
                        plotter.plot(k, 'itr', phase, k, itr, v)
                        print(k, v, flush=True)
                    print('Loss Train', train_loss_meter.avg, flush=True)
                    plotter.plot('Loss', 'itr', phase, 'Loss Train', itr,
                                 train_loss_meter.avg)
                    utils.save_scores(running_metrics_train.get_history(),
                                      phase, cv_num)
                    utils.save_loss(train_loss_meter.get_history(), phase,
                                    cv_num)
            print('Phase: val', flush=True)
            phase = 'val'
            # Set model to validation mode
            net.train(False)
            with torch.no_grad():
                for i, data in tqdm(enumerate(dataloaders[phase]),
                                    desc='Dataiteration_val'):
                    val_pred, val_labels, val_loss = val(
                        data, net, criterion, device)
                    running_metrics_val.update(val_pred, val_labels)
                    val_loss_meter.update(val_loss, n=1)
                    if (i + 1) % 10 == 0:
                        print('Number of Iteration [{}/{}]'.format(
                            i + 1, int(datalengths[phase] / batch_size)),
                              flush=True)
                    utils.save_scores(running_metrics_val.get_history(), phase,
                                      cv_num)
                    utils.save_loss(val_loss_meter.get_history(), phase,
                                    cv_num)
                    if val_loss_meter.avg < lowest_loss:
                        lowest_loss = val_loss_meter.avg
                        utils.save_net(path_to_net,
                                       batch_size,
                                       epoch,
                                       cv_num,
                                       train_indices,
                                       val_indices,
                                       test_indices,
                                       net,
                                       optimizer,
                                       criterion,
                                       iter_num=i)
                # Plot validation metrics and loss at the end of the val phase
                score = running_metrics_val.get_scores()
                for k, v in score.items():
                    plotter.plot(k, 'itr', phase, k, itr, v)
                    print(k, v, flush=True)
                print('Loss Val', val_loss_meter.avg, flush=True)
                plotter.plot('Loss', 'itr', phase, 'Loss Val', itr,
                             val_loss_meter.avg)

            print(
                'Epoch [{}/{}], Train_loss: {:.4f}, Train_bacc: {:.2f}'.format(
                    epoch + 1, epochs, train_loss_meter.avg,
                    running_metrics_train.bacc),
                flush=True)
            print('Epoch [{}/{}], Val_loss: {:.4f}, Val_bacc: {:.2f}'.format(
                epoch + 1, epochs, val_loss_meter.avg,
                running_metrics_val.bacc),
                  flush=True)
            # Call the learning rate adjustment function after every epoch
            scheduler.step(train_loss_meter.avg)
        # Save net after every cross validation cycle
        utils.save_net(path_to_net, batch_size, epochs, cv_num, train_indices,
                       val_indices, test_indices, net, optimizer, criterion)