Esempio n. 1
0
def evaluate():
    transform, lidar_transform, mask_transform = get_transform()
    test_dataset = TreeDataset(config['proc_data'],
                               transform=transform,
                               lidar_transform=lidar_transform,
                               use_lidar=args.use_lidar,
                               mask_transform=mask_transform,
                               purpose='test')

    # baseline
    if args.baseline:
        print('====== Baseline Performance ======')
        baseline = PixelThreshold(config['greenpixel'])
        evaluate_model(test_dataset,
                       baseline,
                       threshold=args.threshold,
                       device=torch.device('cpu'),
                       batch_size=args.batch_size)

    print('====== CNN Model Performance ======')
    model.load_state_dict(torch.load(args.model_ckp, map_location=device))

    evaluate_model(test_dataset,
                   model,
                   threshold=args.threshold,
                   use_lidar=args.use_lidar,
                   device=device,
                   batch_size=args.batch_size)
    return
Esempio n. 2
0
    def t_ev_pth_eval_button_on_click(self):
        file_path = self.t_ev_pth_path_line.text()
        if os.path.exists(file_path):
            import evaluate

            cache_path = os.path.join(os.getcwd(), "cache")
            if os.path.isdir(cache_path) is False:
                os.makedirs(cache_path)

            try:
                evaluate.evaluate_model(ckpt_file=file_path,
                                        device="cuda",
                                        show_fig=False,
                                        save_path=os.path.join(
                                            cache_path, "eval.png"))
            except:
                evaluate.evaluate_model(ckpt_file=file_path,
                                        device="cuda",
                                        show_fig=False,
                                        save_path=os.path.join(
                                            cache_path, "eval.png"))

            image = QtGui.QPixmap()
            image.load(os.path.join(cache_path, "eval.png"))
            self.graphicsView.scene = QGraphicsScene()  # 创建一个图片元素的对象
            item = QGraphicsPixmapItem(image)  # 创建一个变量用于承载加载后的图片
            self.graphicsView.scene.addItem(item)  # 将加载后的图片传递给scene对象
            self.graphicsView.setScene(self.graphicsView.scene)
        else:
            reply = QMessageBox.information(self, "Error", "Invalid File path",
                                            QMessageBox.Ok)
        return
Esempio n. 3
0
def midpoint_eval(batch):
    if (batch + 1) % 25 == 0:
        print 'Askubuntu dev'
        evaluate.evaluate_model(model, encode_fn, dev_samples, question_map)
        print 'Askubuntu test'
        evaluate.evaluate_model(model, encode_fn, test_samples, question_map)
        torch.save(model, save_name + str((batch + 1) * batch_size))
        print '\nMODEL SAVED\n'
Esempio n. 4
0
def run_sequence(training_data, testing_data, valid_data, all_relations,
                 vocabulary, embedding, cluster_labels, num_clusters,
                 shuffle_index):
    splited_training_data = split_data(training_data, cluster_labels,
                                       num_clusters, shuffle_index)
    splited_valid_data = split_data(valid_data, cluster_labels,
                                    num_clusters, shuffle_index)
    splited_test_data = split_data(testing_data, cluster_labels,
                                   num_clusters, shuffle_index)
    #print(splited_training_data)
    '''
    for data in splited_training_data[0]:
        print(data)
        print(cluster_labels[data[0]])
    '''
    #print(cluster_labels)
    seen_relations = []
    current_model = None
    grads_means = []
    grads_fishers = []
    sequence_results = []
    #np.set_printoptions(precision=3)
    result_whole_test = []
    for i in range(num_clusters):
        seen_relations += [data[0] for data in splited_training_data[i] if
                          data[0] not in seen_relations]
        current_train_data = remove_unseen_relation(splited_training_data[i],
                                                    seen_relations)
        current_valid_data = remove_unseen_relation(splited_valid_data[i],
                                                    seen_relations)
        current_test_data = []
        for j in range(i+1):
            current_test_data.append(
                remove_unseen_relation(splited_test_data[j], seen_relations))
        current_model = train(current_train_data, current_valid_data,
                              vocabulary, embedding_dim, hidden_dim,
                              device, batch_size, lr, model_path,
                              embedding, all_relations, current_model, epoch,
                              grads_means, grads_fishers, loss_margin)
        grad_mean, grad_fisher = get_mean_fisher(current_model,
                                                 current_train_data,
                                                 all_relations)
        #print(grad_mean)
        grads_means.append(grad_mean)
        grads_fishers.append(grad_fisher)
        results = [evaluate_model(current_model, test_data, batch_size,
                                  all_relations, device)
                   for test_data in current_test_data]
        print_list(results)
        sequence_results.append(np.array(results))
        result_whole_test.append(evaluate_model(current_model,
                                                testing_data, batch_size,
                                                all_relations, device))
    print('test set size:', [len(test_set) for test_set in current_test_data])
    return sequence_results, result_whole_test
Esempio n. 5
0
	def evaluate(self, prediction_path, test_con_path, *args, **kwargs):
		'''
		Args:
            predictions: [tuple,...], list of tuples [same format as output from predict]
            groundTruths: [tuple,...], list of tuples representing ground truth.

        Returns:
            metrics: tuple with (p,r,f1). Each element is float.
		'''
		evaluate.evaluate_model(prediction_path, test_con_path)
		print ("Evaluation Completed.")
Esempio n. 6
0
def print_vocab_summary():
    model_name = 'att512'
    model_class = attention3.Attention512
    opt_id = 'adam'
    for token_id, tokenizer_obj in train.tokenizers.items():
        # save each one
        filename = model_name + '_' + token_id + '_' + opt_id + '_' + train.version
        # load model
        model_obj = model_class(filename, tokenizer_obj, opt_id)
        model_obj.train_save(epochs=0)  # load weights
        # test on some training sequences
        print('Evaluating manual set: ' + model_name)
        evaluate_model(model_obj, grammar_dataset)
Esempio n. 7
0
def main():
    current_checkpoints_path = os.path.join(opt.data_path,
                                            opt.target_checkpoints_path)
    eval_required_checkpoint_paths = glob.glob(current_checkpoints_path +
                                               '*/*.h5')

    evaluate_model(
        target_checkpoints_path=current_checkpoints_path,
        eval_required_checkpoint_paths=eval_required_checkpoint_paths)

    with open('/tmp/target-checkpoints-path.txt', 'w') as f:
        f.write(opt.target_checkpoints_path)

    with open('/tmp/checkpoint-metrics-filename.txt', 'w') as f:
        f.write(opt.checkpoint_metadata_filename)
Esempio n. 8
0
def train(model, train_loader, optimizer, criterion, docDataset, num_neg):
    best_hr, best_ndcg, best_iter = 0, 0, -1
    for epoch in range(epochs):
        print('eopch: {}'.format(epoch + 1))
        # Training
        for i, feed_dict in enumerate(train_loader):
            if i % 10 == 0:
                print('step: {} / {}'.format(i, len(train_loader)))
            for key in feed_dict:
                if type(feed_dict[key]) != type(None):
                    feed_dict[key] = feed_dict[key].to(dtype=torch.long,
                                                       device=device)
            # Forward pass
            outputs = model(feed_dict)
            labels = feed_dict['label'].float().view(outputs.shape)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # Evaluation
        (hits, ndcgs) = evaluate_model(model, docDataset, topK)
        hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
        if hr > best_hr:
            best_hr, best_ndcg, best_iter = hr, ndcg, epoch
        # Negative sample again
        docDataset.user_input, docDataset.item_input, docDataset.labels = \
            docDataset.get_train_instances(docDataset.ratingMatrix, num_neg=num_neg)
        train_loader = torch.utils.data.DataLoader(docDataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
    return best_hr, best_ndcg
Esempio n. 9
0
    def build_model(self, maxIter=100, num_thread=4, batch_size=32):
        # Training process
        print(
            "Training MF-BPR with: learning_rate=%.2f, regularization=%.4f, factors=%d, #epoch=%d, batch_size=%d."
            %
            (self.learning_rate, self.reg, self.factors, maxIter, batch_size))
        for iteration in xrange(maxIter):
            # Each training epoch
            t1 = time.time()
            for s in xrange(self.num_rating / batch_size):
                # sample a batch of users, positive samples and negative samples
                (users, items_pos, items_neg) = self.get_batch(batch_size)
                # perform a batched SGD step
                self.sgd_step(users, items_pos, items_neg, self.learning_rate)

            # check performance
            t2 = time.time()
            self.U_np = self.U.eval()
            self.V_np = self.V.eval()
            topK = 100
            (hits, ndcgs) = evaluate_model(self, self.test, topK, num_thread)
            print(
                "Iter=%d [%.1f s] HitRatio@%d = %.3f, NDCG@%d = %.3f [%.1f s]"
                % (iteration, t2 - t1, topK, np.array(hits).mean(), topK,
                   np.array(ndcgs).mean(), time.time() - t2))
Esempio n. 10
0
def main(train_stock, val_stock, window_size, batch_size, ep_count, model_name,
         pretrained, debug):
    """ Trains the stock trading bot using Deep Q-Learning.

    Please see https://arxiv.org/abs/1312.5602 for more details.

    Args: optional arguments [python train.py --help]
    """
    switch_k_backend_device()

    agent = Agent(window_size, pretrained=pretrained, model_name=model_name)
    train_data = get_stock_data(train_stock)
    val_data = get_stock_data(val_stock)

    initial_offset = val_data[1] - val_data[0]

    for episode in range(1, ep_count + 1):
        train_result = train_model(agent,
                                   episode,
                                   train_data,
                                   ep_count=ep_count,
                                   batch_size=batch_size,
                                   window_size=window_size)
        val_result, _ = evaluate_model(agent, val_data, window_size, debug)
        show_train_result(train_result, val_result, initial_offset)
Esempio n. 11
0
def main():
    import configspark
    sc = configspark.SPARK_CONTEXT

    # user/song string ID to int ID mappings
    full_text = sc.textFile(config.MSD_DATA)
    full_raw = full_text.map(msd_parse.parse_line)
    users, songs, _ = msd_parse.get_user_song_maps(full_raw)

    print("\nLoading MovieLens test dataset\n")
    test_parsed = (
        sc.textFile(config.MSD_TEST)
        .map(msd_parse.parse_line))
    test_prepped = msd_parse.replace_raw_ids(test_parsed, users, songs)
    test = test_prepped.map(msd_parse.rating_convert)

    if os.path.exists(config.MSD_MODEL):
        print("\n\nLoading existing recommendation model from %s\n\n"
              % config.MSD_MODEL)
        model = MatrixFactorizationModel.load(sc, config.MSD_MODEL)
    else:
        raise RuntimeError("Failed to load ALS model from %s"
                           % config.MSD_MODEL)

    mse, rmse = evaluate.evaluate_model(model, test)
    print("\nMSD ALS model performance: MSE=%0.3f RMSE=%0.3f\n" % (mse, rmse))
Esempio n. 12
0
def evaluate(model_filepath, data_filepath):
    print(f'Running evaluation on {data_filepath} with model {model_filepath}')
    data_output_filepath = os.path.join(data_filepath,
                                        PROCESSED_STOCKNET_DATA_FOLDER)

    with open(os.path.join(data_output_filepath, 'test.pkl'), 'rb') as obj:
        test = pickle.load(obj)
        test_company_to_price_df, test_company_to_tweets, test_date_universe, test_n_days, test_n_stocks, test_max_tweets = test
    test_dataset = StockDataset(test_company_to_price_df,
                                test_company_to_tweets, test_date_universe,
                                test_n_days, test_n_stocks, test_max_tweets)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0)

    man_sf_model = torch.load(model_filepath)

    man_sf_model.eval()

    test_acc, sharpe_ratio, f1 = evaluate_model(man_sf_model, test_dataloader,
                                                T, test_company_to_tweets,
                                                test_date_universe,
                                                data_filepath)

    print('test accuracy:', test_acc)
    print('sharpe ratio:', sharpe_ratio[0])
    print('f1:', f1)
Esempio n. 13
0
def evaluate(model, test_ratings, test_negatives, K=10):
    """Helper that calls evaluate from the NCF libraries."""
    (hits, ndcgs) = evaluate_model(model,
                                   test_ratings,
                                   test_negatives,
                                   K=K,
                                   num_thread=1)
    return np.array(hits).mean(), np.array(ndcgs).mean()
Esempio n. 14
0
def main():
    command_line = parseArgs()
    path_to_train = './'+command_line.data+'/rsc15_train_full.txt'
    path_to_test = './' + command_line.data+'/rsc15_test.txt'
    path_to_train = path_to_test
    print(path_to_train)
    print(path_to_test)
    data = pd.read_csv(path_to_train, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(path_to_test, sep='\t', dtype={'ItemId': np.int64})

    args = Args()
    args.n_items = len(data['ItemId'].unique())

    args.learning_rate = command_line.lr
    args.is_training = command_line.train
    args.test_model = command_line.test
    args.hidden_act = command_line.hidden_act
    args.final_act = command_line.final_act
    args.loss = command_line.loss
    args.layers = command_line.layer
    args.batch_size = command_line.size
    args.n_epochs = command_line.epoch

    args.checkpoint_dir = command_line.checkpoint_dir
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout
    args.weight_decay = command_line.weight_decay
    args.rnn_size = command_line.rnn_size
    args.optimize = command_line.optimize
    args.evaluate_train = command_line.evaluate_train

    with tf.Session() as sess:
        print("\n\n\nBEGIN: Batch size: {}, Loss: {}".format(args.batch_size, args.loss))
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            print("Traing only")
            gru.fit(data)

    if not args.is_training:
        if args.evaluate_train == 1:
            valid = data
        result = []
        for i in range(0,args.n_epochs): #number epoch
            tf.reset_default_graph()
            with tf.Session() as eval_sess:

                args.test_model = i
                gru = model.GRU4Rec(eval_sess, args)
                res = evaluate.evaluate_model(gru, data, valid, batch_size=args.batch_size)
                print('Epoch {}\tRecall@20: {}\tMRR@20: {}'.format(i,res[0], res[1]))
                result.append(res)
        if args.evaluate_train == 1:
            with open(args.checkpoint_dir + '_result_in_train.txt', 'w') as file:
                for rs in result:
                    file.write('{}\t{}\n'.format(rs[0], rs[1]))
        else:
            with open(args.checkpoint_dir+'_result_'+str(args.batch_size)+'.txt','w') as file:
                for rs in result:
                    file.write('{}\t{}\n'.format(rs[0], rs[1]))
Esempio n. 15
0
def calculate(pred_relevance, split_idx):
	rel_query_model = pred_relevance
	print type(rel_query_model)
	print rel_query_model.shape.eval()
	with open("query_model.pkl", "rb") as file: query_model = Pickle.load(file)[:split_idx]
	with open("query_list.pkl", "rb") as file:	query_list = Pickle.load(file)[:split_idx]

	with open("doc_model.pkl", "rb") as file: doc_model = Pickle.load(file)
	with open("doc_list.pkl", "rb") as file: doc_list = Pickle.load(file)
	#with open("relevance_model_RM.pkl", "rb") as file : rel_query_model = Pickle.load(file)
	#with open("query_relevance_model_RLE.pkl", "rb") as file : rel_query_model = Pickle.load(file)

	background_model = ProcDoc.read_background_dict()
	qry_eval = evaluate.evaluate_model(True)

	''' document smoothing '''
	for doc_idx in range(doc_model.shape[0]):
		doc_vec = doc_model[doc_idx]
		doc_model[doc_idx] = (1 - doc_lambda) * doc_vec + doc_lambda * background_model
	
	mAP_list = []
	query_rel_list = []
	query_bg_list = []	
	doc_model = np.log(doc_model)

	doc_model = doc_model
	for rel_qry_lambda in np.linspace(0, 1., num=11):
		''' query smoothing '''	
		with open("query_model.pkl", "rb") as file: query_model = Pickle.load(file)[:split_idx]
		X = T.matrix()
		Y = (1- rel_qry_lambda)*X + rel_qry_lambda * rel_query_model 
		f = theano.function([X], Y)
		query_model = f(query_model)
		result = np.argsort(-np.dot(query_model, doc_model.T), axis = 1)
		query_docs_ranking = {}
		''' speedup '''
		for q_idx in range(len(query_list)):
			docs_ranking = []
			for doc_idx in result[q_idx]:
				docs_ranking.append(doc_list[doc_idx])
			query_docs_ranking[query_list[q_idx]] = docs_ranking
		
		''' query 
		for query_key, query_vec in  zip(query_list, query_model):
			print len(query_docs_ranking.keys())
			query_result = np.argsort(-(query_vec * doc_model).sum(axis = 1))
			docs_ranking = []
			for doc_`idx in query_result:
				docs_ranking.append(doc_list[doc_idx])
				query_docs_ranking[query_key] = docs_ranking
			
		mAP = eval.mean_average_precision(query_docs_ranking)	
		print mAP, qry_lambda, rel_qry_lambda
		'''
		mAP = qry_eval.mean_average_precision(query_docs_ranking)	
		mAP_list.append(mAP)
	return max(mAP_list)
Esempio n. 16
0
def main():
    args = parse()
    config_path = args.config_path

    gin.external_configurable(keras.optimizers.Adam,
                              module='tensorflow.python.keras.optimizers')
    gin.external_configurable(keras.losses.categorical_crossentropy,
                              module='tensorflow.python.keras.losses')
    gin.parse_config_file(config_path)

    # args = RunConfig()
    # args.config_path = config_path

    data = create_load_data(args)

    model = train_model(data, args)

    evaluate_model(data, model, args)
Esempio n. 17
0
def test(model, full_dataset : MovieDataset, topK):
    'Test the HR and NDCG for the model @topK'
    # put the model in eval mode before testing
    if hasattr(model,'eval'):
        # print("Putting the model in eval mode")
        model.eval()
    t1 = time()
    (hits, ndcgs) = evaluate_model(model, full_dataset, topK)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Eval: HR = %.4f, NDCG = %.4f [%.1f s]' % (hr, ndcg, time()-t1))
    return hr, ndcg
Esempio n. 18
0
def main():
    import configspark
    sc = configspark.SPARK_CONTEXT

    print("\nLoading MovieLens test dataset\n")
    test_text = sc.textFile(config.ML_RATINGS_TEST)
    ratings_test = (
        test_text.map(ml_parse.parse_line).map(ml_parse.rating_convert))

    if os.path.exists(config.ML_MODEL):
        print("\n\nLoading existing recommendation model from %s\n\n"
              % config.ML_MODEL)
        model = MatrixFactorizationModel.load(sc, config.ML_MODEL)
    else:
        raise RuntimeError("Failed to load ALS model from %s" % config.ML_MODEL)

    mse, rmse = evaluate.evaluate_model(model, ratings_test)
    print("\nML ALS model performance: MSE=%0.3f RMSE=%0.3f\n" % (mse, rmse))
Esempio n. 19
0
    def build_model(self, maxIter=100, num_thread=4, batch_size=32):
        # dataloader
        data_loader = DataLoader(self.dataset, batch_size=batch_size)

        # Training process
        print(
            "Training MF-BPR with: learning_rate=%.4f, regularization=%.4f, factors=%d, #epoch=%d, batch_size=%d."
            %
            (self.learning_rate, self.reg, self.factors, maxIter, batch_size))
        t1 = time.time()
        iter_loss = 0
        for iteration in xrange(maxIter):
            # self.mf_scheduler.step()
            # Each training epoch
            for s, (users, items_pos, items_neg) in enumerate(data_loader):
                # sample a batch of users, positive samples and negative samples

                # zero grad
                self.mf_optim.zero_grad()
                # forward propagation
                y_ui, y_uj, loss = self.forward(users, items_pos, items_neg)
                iter_loss += loss
                # back propagation
                loss.backward()
                self.mf_optim.step()

            # check performance
            if iteration % 20 == 19:
                t2 = time.time()
                topK = 20
                (hits, ndcgs) = evaluate_model(self, self.test, topK,
                                               num_thread)
                # save the hr and ndcg value.
                hr_mean = np.array(hits).mean()
                ndcg_mean = np.array(ndcgs).mean()

                print(
                    "Iter=%d [%.1f s] HitRatio@%d = %.4f, NDCG@%d = %.4f [%.1f s]"
                    % (iteration, (t2 - t1) / 20, topK, hr_mean, topK,
                       ndcg_mean, time.time() - t2))
                t1 = time.time()
                iter_loss = 0
Esempio n. 20
0
    def build_model(self, maxIter=100, batch_size=32):
        self.maxIter = maxIter
        self.batch_size = batch_size
        print(
            'Training MF-BPR model with: learning_rate={}, reg={}, hidden_dims={}, #epoch={}, batch_size={}.'
            .format(self.learning_rate, self.reg, self.hidden_dims,
                    self.maxIter, self.batch_size))

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # training process
            # each training epoch
            for iteration in range(maxIter):
                t1 = time.time()
                for _ in range(self.num_rating // self.batch_size):
                    uij_train = self.get_batch()
                    sess.run(
                        [self.sgd_step],
                        feed_dict={  # optimization
                            self.u: uij_train[:, 0],
                            self.i: uij_train[:, 1],
                            self.j: uij_train[:, 2]
                        })
                print('Have finished epoch {}.'.format(iteration + 1))

                # check performance
                t2 = time.time()
                variable_names = [v.name for v in tf.trainable_variables()]
                self.parameters = sess.run(variable_names)
                # self.parameters[0] ==> latent matrix for users
                # self.parameters[1] ==> latent matrix for items
                hits, ndcgs = evaluate_model(self, self.test, self.topK)
                print(
                    'Iter: {} [{:.2f} s] HitRatio@{} = {:.4f}, NDCG@{} = {:.4f} [{:.2f} s]'
                    .format(iteration + 1, t2 - t1, self.topK,
                            np.array(hits).mean(), self.topK,
                            np.array(ndcgs).mean(),
                            time.time() - t2))
Esempio n. 21
0
def main(train_data, val_data, train_data2, val_data2, window_size, batch_size,
         ep_count, model_name, pretrained, debug):
    """ Trains the stock trading bot using Deep Q-Learning.

    Please see https://arxiv.org/abs/1312.5602 for more details.

    Args: optional arguments [python train.py --help]
    """
    switch_k_backend_device()

    train_data = train_data
    val_data = val_data
    train_data2 = train_data2
    val_data2 = val_data2

    # Initialize agent, state size depends if 1 or 2 input features
    if train_data2 is not None:
        agent = Agent(window_size * 2,
                      pretrained=pretrained,
                      model_name=model_name)
    else:
        agent = Agent(window_size,
                      pretrained=pretrained,
                      model_name=model_name)

    initial_offset = val_data[1] - val_data[0]

    for episode in range(1, ep_count + 1):
        train_result = train_model(agent,
                                   episode,
                                   train_data,
                                   train_data2,
                                   ep_count=ep_count,
                                   batch_size=batch_size,
                                   window_size=window_size)
        val_result, _ = evaluate_model(agent, val_data, val_data2, window_size,
                                       debug)
        show_train_result(train_result, val_result, initial_offset)
Esempio n. 22
0
def main():
    with open("data/data", "rb") as file:
        data = pickle.load(file)
    params = get_configs()
    random.shuffle(params)
    for i, param in enumerate(params):
        avg_test = evaluate.evaluate_model(param)
        path = "./plots/" + param.name + "/"
        if not os.path.exists(path):
            os.makedirs(path)

        visualize.plot_summary_by_dset(data, path)
        visualize.plot_aggregate_summary(data, path)
        visualize.plot_seq_summary(data, path)

        if not os.path.isfile("sweep.csv"):
            with open("sweep.csv", "w") as csvfile:
                top = [
                    "#_layers", "hidden_size", "learning_rate", "alpha",
                    "beta", "lambds", "#_epochs"
                ]
                for dset in config.ALLDATA_SINGLE:
                    top.append(dset + "_corr")
                    top.append(dset + "_squared")
                writer = csv.writer(csvfile)
                writer.writerow(top)

        with open("sweep.csv", "a+") as csvfile:
            writer = csv.writer(csvfile)
            layer = [
                param.n_layers, param.hidden_size, param.lr, param.alpha,
                param.beta, param.lambd, param.n_epochs
            ]
            for dset in config.ALLDATA_SINGLE:
                layer.append(np.mean(avg_test[dset][0]))
                layer.append(np.mean(avg_test[dset][1]))
            writer.writerow(layer)
Esempio n. 23
0
          %(time()-t1, num_users, num_subcategories, num_materials, num_patterns, len(train), len(test)))

    # Build model
    model = get_model(num_users, num_subcategories, num_materials, num_patterns, layers, reg_layers)
    if learner.lower() == "adagrad":
        model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')

    # Check Init performance
    t1 = time()
    (hits, ndcgs) = evaluate_model(model, test, test, topK, evaluation_threads)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Init: HR = %.4f, NDCG = %.4f [%.1f]' %(hr, ndcg, time()-t1))

    # Train model
    best_hr, best_ndcg, best_iter = hr, ndcg, -1
    for epoch in xrange(epochs):
        t1 = time()
        # Generate training instances
        user_input, subcategory_input, material_input, pattern_input, labels = get_train_instances(train, num_negatives)

        # Training
        hist = model.fit([np.array(user_input), np.array(subcategory_input), np.array(material_input), np.array(pattern_input)], #input
                         np.array(labels), # labels
                         batch_size=batch_size, nb_epoch=1, verbose=0, shuffle=True)
        t2 = time()
Esempio n. 24
0
    model = get_model(num_users, num_items, num_factors, regs)
    if learner.lower() == "adagrad": 
        model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')
    #print(model.summary())
    
# In[4]: trian and test
    # Init performance
        #   Init performance
    t1 = time.time()
    (mae, hit, ndcg) = evaluate_model(model, train, testRatings, testNegatives, topK, evaluation_threads)
    # average value
    print(
        "mae: %.6f , hit: %.6f , ndcg : %.6f" % (np.array(mae).mean(), np.array(hit).mean(), np.array(ndcg).mean()))
    hr = np.array(mae).mean()
    print('Init: MAE = %.4f,\t [%.1f s]' % (hr, time.time() - t1))

    # Train model
    best_hr, best_iter = hr, -1
    for epoch in range(epochs):
        t1 = time.time()
        # Generate training instances
        user_input, item_input, labels = get_train_instances(train, num_negatives)
        # Training
        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels), batch_size=batch_size,
                         epochs=1, verbose=0, shuffle=True)
Esempio n. 25
0
            dataset = data + "_" + str(i)
            print("\n")
            print("< model > ", model_name)
            print("< dataset >", dataset)
            print()
            train_model(method=method,
                        resolution=r,
                        dataset=dataset,
                        in_size=in_size,
                        size=s,
                        step=step,
                        arch=arch,
                        opt=opt,
                        lr=lr,
                        epochs=epochs,
                        batch_size=batch_size,
                        l2_reg=l2_reg,
                        decay=decay,
                        border_weight=None,
                        binary=majority)
            test_model(method=method,
                       resolution=r,
                       dataset=dataset,
                       in_size=in_size,
                       size=s,
                       step=step,
                       label_map=False)
        mv_dirs(mpath)
        save_TestTime_asFile(mpath)
        evaluate_model(mpath)
Esempio n. 26
0
def run_sequence(training_data, testing_data, valid_data, all_relations,
                 vocabulary, embedding, cluster_labels, num_clusters,
                 shuffle_index, rel_embeds):
    splited_training_data = split_data(training_data, cluster_labels,
                                       num_clusters, shuffle_index)
    splited_valid_data = split_data(valid_data, cluster_labels, num_clusters,
                                    shuffle_index)
    splited_test_data = split_data(testing_data, cluster_labels, num_clusters,
                                   shuffle_index)
    seen_relations = []
    current_model = None
    #'alignment_model' is correspondin to the alignment linear model
    alignment_model = None
    memory_data = []
    memory_que_embed = []
    memory_rel_embed = []
    sequence_results = []
    result_whole_test = []
    all_seen_rels = []
    for i in range(num_clusters):
        for data in splited_training_data[i]:
            if data[0] not in seen_relations:
                seen_relations.append(data[0])
        current_train_data = remove_unseen_relation(splited_training_data[i],
                                                    seen_relations)
        current_valid_data = remove_unseen_relation(splited_valid_data[i],
                                                    seen_relations)
        current_test_data = []
        for j in range(i + 1):
            current_test_data.append(
                remove_unseen_relation(splited_test_data[j], seen_relations))
        one_memory_data = []
        for this_sample in current_train_data:
            if this_sample[0] not in all_seen_rels:
                all_seen_rels.append(this_sample[0])
        update_rel_cands(memory_data, all_seen_rels, rel_embeds)
        to_train_data = current_train_data
        current_model = train(to_train_data, current_valid_data, vocabulary,
                              embedding_dim, hidden_dim, device, batch_size,
                              lr, embedding, all_relations, current_model,
                              epoch, memory_data, loss_margin, alignment_model)
        #memory_data.append(current_train_data[-task_memory_size:])
        memory_data.append(
            select_data(current_model, current_train_data, task_memory_size,
                        all_relations, alignment_model))
        #memory_data.append(select_data_icarl(current_model, current_train_data,
        #                               task_memory_size, all_relations,
        #                               alignment_model))
        memory_que_embed.append(
            get_que_embed(current_model, memory_data[-1], all_relations,
                          alignment_model))
        memory_rel_embed.append(
            get_rel_embed(current_model, memory_data[-1], all_relations,
                          alignment_model))
        to_train_data = []
        for this_memory in memory_data:
            to_train_data += this_memory
        if len(memory_data) > 1:
            cur_que_embed = [
                get_que_embed(current_model, this_memory, all_relations,
                              alignment_model, True)
                for this_memory in memory_data
            ]
            cur_rel_embed = [
                get_rel_embed(current_model, this_memory, all_relations,
                              alignment_model, True)
                for this_memory in memory_data
            ]
            alignment_model = update_alignment_model(alignment_model,
                                                     cur_que_embed,
                                                     cur_rel_embed,
                                                     memory_que_embed,
                                                     memory_rel_embed)
            memory_que_embed = [
                get_que_embed(current_model, this_memory, all_relations,
                              alignment_model, False)
                for this_memory in memory_data
            ]
            memory_rel_embed = [
                get_rel_embed(current_model, this_memory, all_relations,
                              alignment_model, False)
                for this_memory in memory_data
            ]
        results = [
            evaluate_model(current_model, test_data, batch_size, all_relations,
                           device, alignment_model)
            for test_data in current_test_data
        ]
        print_list(results)
        sequence_results.append(np.array(results))
        result_whole_test.append(
            evaluate_model(current_model, testing_data, batch_size,
                           all_relations, device, alignment_model))
    print('test set size:', [len(test_set) for test_set in current_test_data])
    return sequence_results, result_whole_test
Esempio n. 27
0
# Perform preproccesing here if needed
x_train = x_train.reshape((len(x_train), -1)) / 255.
x_test = x_test.reshape((len(x_test), -1)) / 255.

# Make a dataset object like so and give it a name
dataset = (x_train, y_train), (x_test, y_test)
dataset_name = 'MNIST'

# Set source and target labels for transfer learning (align, orthogonal will be computed automaticaly)
source_labels = [3, 4]
target_labels = [8, 9]

# Set your model and model name (needs a .predict method that provides classes labels prediction)
model = XGBClassifier()
model_name = 'XGB'

# Choose a number of experiment (the more the better for uncertainty !)
n_trials = 50
evaluate_model(model,
               n_trials,
               dataset,
               source_labels,
               target_labels,
               n_target=4,
               n_source_max=2000,
               points_per_trial=20,
               model_name=model_name,
               dataset_name=dataset_name,
               save_pickles=False)
Esempio n. 28
0
    print("""
    Training the model
    """)

    # running the model
    args, gt_change, numpy_rasters, trained_model, datasets = main()

    print("""
    We now test the results for several models
    """)

    # boolean to allow evaluation or not
    evaluation = False

    # performing evaluation on the different models
    if evaluation:
        print("AE_rad")
        eval_model.evaluate_model("AE_rad", gt_change)
        print("AE_rad+DAN")
        eval_model.evaluate_model("AE_rad+DAN", gt_change)
        print("AE_Mmodal")
        eval_model.evaluate_model("AE_Mmodal", gt_change)
        print("AE_Mmodal+DAN")
        eval_model.evaluate_model("AE_Mmodal+DAN", gt_change)
        print("AE_Mmodal+DAN+split")
        eval_model.evaluate_model("AE_Mmodal+DAN+split", gt_change)
        print("AE_alt+DAN")
        eval_model.evaluate_model("AE_alt+DAN", gt_change)
        print("bayesian_model")
        eval_model.evaluate_model("bayesian_model", gt_change)
Esempio n. 29
0
    def fit(self):
        parameters = self.parameters
        conf_parameters = self.conf_parameters
        dataset_filepaths = self.dataset_filepaths
        dataset = self.dataset
        dataset_brat_folders = self.dataset_brat_folders
        sess = self.sess
        model = self.model
        transition_params_trained = self.transition_params_trained
        stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters)

        # Initialize and save execution details
        start_time = time.time()
        results = {}
        results['epoch'] = {}
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time
        results['execution_details']['time_stamp'] = experiment_timestamp
        results['execution_details']['early_stop'] = False
        results['execution_details']['keyboard_interrupt'] = False
        results['execution_details']['num_epochs'] = 0
        results['model_options'] = copy.copy(parameters)

        model_folder = os.path.join(stats_graph_folder, 'model')
        utils.create_folder_if_not_exists(model_folder)
        with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
            conf_parameters.write(parameters_file)
        pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))
            
        tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
        utils.create_folder_if_not_exists(tensorboard_log_folder)
        tensorboard_log_folders = {}
        for dataset_type in dataset_filepaths.keys():
            tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
            utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
                
        # Instantiate the writers for TensorBoard
        writers = {}
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
        embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

        embeddings_projector_config = projector.ProjectorConfig()
        tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
        token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
        tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

        tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
        character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
        tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

        projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

        # Write metadata for TensorBoard embeddings
        token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
        for token_index in range(dataset.vocabulary_size):
            token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
        token_list_file.close()

        character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
        for character_index in range(dataset.alphabet_size):
            if character_index == dataset.PADDING_CHARACTER_INDEX:
                character_list_file.write('PADDING\n')
            else:
                character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
        character_list_file.close()


        # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
        bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
        previous_best_valid_f1_score = 0
        epoch_number = -1
        try:
            while True:
                step = 0
                epoch_number += 1
                print('\nStarting epoch {0}'.format(epoch_number))

                epoch_start_time = time.time()

                if epoch_number != 0:
                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    random.shuffle(sequence_numbers)
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters)
                        step += 1
                        if step % 10 == 0:
                            print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                epoch_elapsed_training_time = time.time() - epoch_start_time
                print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                # Evaluate model: save and plot results
                evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                if parameters['use_pretrained_model'] and not parameters['train_model']:
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                    break

                # Save model
                model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                # Save TensorBoard logs
                summary = sess.run(model.summary_op, feed_dict=None)
                writers['train'].add_summary(summary, epoch_number)
                writers['train'].flush()
                utils.copytree(writers['train'].get_logdir(), model_folder)


                # Early stop
                valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                if  valid_f1_score > previous_best_valid_f1_score:
                    bad_counter = 0
                    previous_best_valid_f1_score = valid_f1_score
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    self.transition_params_trained = transition_params_trained
                else:
                    bad_counter += 1
                print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                if bad_counter >= parameters['patience']:
                    print('Early Stop!')
                    results['execution_details']['early_stop'] = True
                    break

                if epoch_number >= parameters['maximum_number_of_epochs']: break


        except KeyboardInterrupt:
            results['execution_details']['keyboard_interrupt'] = True
            print('Training interrupted')

        print('Finishing the experiment')
        end_time = time.time()
        results['execution_details']['train_duration'] = end_time - start_time
        results['execution_details']['train_end'] = end_time
        evaluate.save_results(results, stats_graph_folder)
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type].close()
import numpy as np
import pandas as pd

from nltk.corpus import bleu_corpus()
from evaluate import evaluate_model

def evaluation(model,src,src_test,trg,trg_test,config)
	bleu = evaluate_model(
		    model, src, src_test, trg,
		    trg_test, config, verbose=False,
		    metric='bleu',
	)
Esempio n. 31
0
File: train.py Progetto: techbala/ml
    model_id = generate_model_id()
    print('model id:', model_id)

    base_dir = 'data/working/single-notes-2000'
    input_dir = base_dir + '/features-04-unscaled/training-data'
    model_dir = base_dir + '/models/' + model_id
    output_dir = model_dir + '/output-data'
    evaluation_dir = model_dir + '/evaluation'

    prepare_dirs([input_dir, model_dir, output_dir, evaluation_dir])

    store_model_files(input_dir, model_dir)

    x, y, ix = load_data(input_dir)

    model = create_model(input_shape=x.shape[1:], class_count=y.shape[1])
    model.summary()
    model = train_model(model,
                        x,
                        y,
                        ix,
                        model_dir,
                        evaluation_dir,
                        epoch_count=30)

    y_proba_pred = predict(model, x, y, ix, output_dir)

    compute_final_metrics(model, x, y, ix, y_proba_pred, evaluation_dir)

    evaluate_model(input_dir, model_dir)
Esempio n. 32
0
def run(metric, method, epochs, evaluation_threads, dataset, data_path,
        start_fold, num_folds, verbose):

    all_best_mrr_ten, all_best_ndcg_ten, all_best_mrr,all_best_ndcg,all_best_loss = [], [], [],[], []

    for fold in range(start_fold, num_folds):
        # Loading data
        print("Fold " + str(fold))
        t1 = time()
        path = data_path + "fold" + str(fold) + "/"
        dataset_name = dataset_loader(path, dataset, method)
        training_data_matrix, test_ratings,   test_positives, train_items  = \
        dataset_name.train_matrix, dataset_name.test_ratings, dataset_name.test_positives, dataset_name.train_items
        num_users, num_items = training_data_matrix.shape
        print(
            "Data load done in [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d"
            % (time() - t1, num_users, num_items, training_data_matrix.nnz,
               len(test_ratings)))

        itempop = training_data_matrix.sum(axis=0)

        all_mrr_ten, all_ndcg_ten, all_mrr, all_ndcg = [], [], [], []
        best_mrr_ten, best_ndcg_ten, best_mrr, best_ndcg, best_loss = 0.0, 0.0, 0.0, 0.0, 123456789

        for epoch in xrange(epochs):

            # Evaluation
            if epoch % verbose == 0:
                mrr_tens, ndcg_tens, mrrs, ndcgs, losses = evaluate_model(
                    train_items, method, metric, dataset, itempop,
                    test_ratings, test_positives, None, 10, evaluation_threads,
                    None)
                mrr_ten, ndcg_ten,mrr,ndcg,loss = np.array(mrr_tens).mean(), np.array(ndcg_tens).mean(), \
                np.array(mrrs).mean(), np.array(ndcgs).mean(), np.array(losses).mean()

                all_mrr_ten.append(mrr_ten)
                all_ndcg_ten.append(ndcg_ten)

                all_mrr.append(mrr)
                all_ndcg.append(ndcg)

                print(
                    'Iteration %d: MRR@10 = %.3f, NDCG@10 = %.3f,MRR = %.3f, NDCG = %.3f, LOSS = %.3f'
                    % (epoch, mrr_ten, ndcg_ten, mrr, ndcg, loss))
                print('AvgMRR@10 = %.3f, AvgNDCG@10 = %.3f,AvgMRR = %.3f, AvgNDCG = %.3f'
                    %(np.array(all_mrr_ten).mean(), np.array(all_ndcg_ten).mean() , \
                         np.array(all_mrr).mean(), np.array(all_ndcg).mean()))
                if ndcg_ten > best_ndcg_ten:
                    best_itr, best_mrr_ten, best_ndcg_ten, best_mrr, best_ndcg, best_loss = epoch, mrr_ten, ndcg_ten, mrr, ndcg, loss
                    #if args.out > 0:
                    #model.save_weights(model_out_file, overwrite=True)

        print(
            "End. Best Iteration %d:  MRR@10 = %.3f, NDCG@10 = %.3f, MRR = %.3f, NDCG = %.3f, LOSS = %.3f. "
            % (best_itr, best_mrr_ten, best_ndcg_ten, best_mrr, best_ndcg,
               best_loss))
        all_best_mrr_ten.append(best_mrr_ten)
        all_best_ndcg_ten.append(best_ndcg_ten)
        all_best_mrr.append(best_mrr)
        all_best_ndcg.append(best_ndcg)
        all_best_loss.append(best_loss)

    print(
        "End. Mean Scores : MRR@10 = %.3f, NDCG@10 = %.3f, MRR = %.3f, NDCG = %.3f,  LOSS = %.3f. "
        % (np.array(all_best_mrr_ten).mean(),
           np.array(all_best_ndcg_ten).mean(), np.array(all_best_mrr).mean(),
           np.array(all_best_ndcg).mean(), np.array(all_best_loss).mean()))
Esempio n. 33
0
    return '%s_%x' % (date_part, random_part)

if __name__ == '__main__':
    model_id = generate_model_id()
    print('model id:', model_id)

    base_dir = 'data/working/single-notes-2000'
    input_dir = base_dir + '/features-04-unscaled/training-data'
    model_dir = base_dir + '/models/' + model_id
    output_dir = model_dir + '/output-data'
    evaluation_dir = model_dir + '/evaluation'

    prepare_dirs([input_dir, model_dir, output_dir, evaluation_dir])

    store_model_files(input_dir, model_dir)

    x, y, ix = load_data(input_dir)

    model = create_model(input_shape=x.shape[1:], class_count=y.shape[1])
    model.summary()
    model = train_model(model,
        x, y, ix,
        model_dir, evaluation_dir,
        epoch_count=30)

    y_proba_pred = predict(model, x, y, ix, output_dir)

    compute_final_metrics(model, x, y, ix, y_proba_pred, evaluation_dir)

    evaluate_model(input_dir, model_dir)