Exemple #1
0
def run(args):

    corpus_train, corpus_val, corpus_test = get.compute_corpora()
    vocabulary, embeddings = get.compute_polyglot_words_embeddings()

    rules, non_terms, proba_unary, proba_binary, transition_rules = pcfg.compute_proba_and_rules(
        corpus_train)

    if args.do_inference:
        get.compute_ground_truth(corpus_test,
                                 filename='evaluation_data.ground_truth')
        get.compute_predictions(rules,
                                non_terms,
                                proba_unary,
                                proba_binary,
                                transition_rules,
                                vocabulary,
                                embeddings,
                                corpus_test,
                                filename='evaluation_data.parser_output')

    if args.evaluate:
        evaluation('evaluation_data.ground_truth',
                   'evaluation_data.parser_output')

    if args.parse:
        parsing.parse_from_txt(args.txt_path, rules, non_terms, proba_unary,
                               proba_binary, transition_rules, vocabulary,
                               embeddings)
Exemple #2
0
 def test3():
     e = evaluation()
     line = [0, 0, 1, 0, 1, 1, 1, 0, 0, 0]
     record = []
     e.analysis_line(line, record, len(line), 6)
     print record[:10]
     return 0
Exemple #3
0
 def test2():
     b = chessboard()
     b[7][7] = 1
     b[8][8] = 2
     b[7][9] = 1
     eva = evaluation()
     for l in eva.POS:
         print l
     return 0
Exemple #4
0
def main(_):
    FLAGS.agent = model(params=FLAGS)
    FLAGS.environment = get_env(FLAGS)
    FLAGS.act = action()

    FLAGS.step_max = FLAGS.environment.data_len()
    FLAGS.train_freq = 40
    FLAGS.update_q_freq = 50
    FLAGS.gamma = 0.97
    FLAGS.show_log_freq = 5
    FLAGS.memory = []  #Experience(FLAGS.memory_size)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    #创建用于保存模型的目录
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)
    start = time.time()

    with tf.Session() as sess:
        sess.run(init)
        eval = evaluation(FLAGS, sess)
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt:
            print('Loading Model...')
            saver.restore(sess, ckpt.model_checkpoint_path)
        total_step = 1
        print('\t'.join(
            map(str, [
                "epoch", "epsilon", "total_step", "rewardPerEpoch", "profits",
                "lossPerBatch", "elapsed_time"
            ])))
        for epoch in range(FLAGS.epoch_num):
            avg_loss_per_batch, total_reward, total_step, profits = run_epch(
                FLAGS, sess, total_step)
            # total_rewards.append(total_reward)
            # total_losses.append(total_loss)

            if (epoch + 1) % FLAGS.show_log_freq == 0:
                # log_reward = sum(total_rewards[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq
                # log_loss = sum(total_losses[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq
                elapsed_time = time.time() - start
                #print('\t'.join(map(str, [epoch+1, FLAGS.act.epsilon, total_step, log_reward, log_loss, elapsed_time])))
                print('\t'.join(
                    map(str, [
                        epoch + 1, FLAGS.act.epsilon, total_step, total_reward,
                        profits, avg_loss_per_batch, elapsed_time
                    ])))
                start = time.time()

                saver.save(
                    sess,
                    FLAGS.model_dir + '\model-' + str(epoch + 1) + '.ckpt')
                eval.eval()
Exemple #5
0
def LR_train(data, label, alpha, valid_data, gt, mode='SGD'):
	# data第一列全1,增广向量
	eva = np.zeros((1000,4))
	eva2 = np.zeros((1000,4))
	cnt = 0
	penalty_lambda = 10
	w = np.zeros(data.shape[1])
	err = data.shape[0]
	cnt = 0
	if(mode=='SGD'):
		for i in range(1000):
			if(i>800):
				alpha *= 0.95
			sum_g = np.zeros(data.shape[1])
			for x,y in zip(data,label):
				err = y - sigmoid(np.dot(x,w))
				w += alpha*(err*x+2*penalty_lambda*2)

			mult_result = np.sum(data*w, axis=1)
			result = np.array(list(map(to_lable,mult_result))).astype(np.int)
			eva[i,:] = evaluation(label, result)
		
	elif(mode=='GD'):
		for i in range(1000):
			if(i>800):
				alpha *= 0.95
			err = label - sigmoid(np.sum(data*w,axis=1))
			# since we use label - h, here use add
			w += alpha*(np.sum(data.transpose()*err,axis=1)+2*penalty_lambda*w)

			mult_result = np.sum(data*w, axis=1)
			result = np.array(list(map(to_lable,mult_result))).astype(np.int)
			eva[i,:] = evaluation(label, result)
			result = LR_classification(w, valid_data, gt)
			eva2[i,:] = evaluation(gt, result)

	np.savetxt('.//training_eva.csv',eva,delimiter =',',fmt='%.6f')
	np.savetxt('.//training_eva2.csv',eva2,delimiter =',',fmt='%.6f')
	return w
Exemple #6
0
def LR(train_file, test_file, valid_flag, test_flag, mode):
	k = 5
	alpha = 0.000001
	data = pd.read_csv('.\\train.csv',header=None).values
	splited_data = split_dataset(data, k)
	cnt = 1
	eva_index = np.zeros((10,4))
	for it in range(k):	
		print('Iteration '+str(cnt))
		cnt+=1	
		if(k!=1):
			train_set = np.delete(splited_data,it,axis=0).reshape((data.shape[0]//k)*(k-1),data.shape[1])
			lable = train_set[:,-1]
			train_set = train_set[:,0:-1]
			aug_data = np.column_stack((np.ones(train_set.shape[0]).transpose(),train_set))
			valid_set = splited_data[it] # use the rest fold data as validation set
			gt = valid_set[:,-1]
			valid_set = valid_set[:,0:-1]
			augvalid_data = np.column_stack((np.ones(valid_set.shape[0]).transpose(),valid_set))
		else:
			lable = data[:,-1]
			train_set = data[:,0:-1]
			aug_data = np.column_stack((np.ones(train_set.shape[0]).transpose(),train_set))
		print('Finish reading files')
		print('begin training with '+mode)
		time_start = time.time()
		trained_w = LR_train(aug_data, lable, alpha, augvalid_data, gt, mode=mode)
		np.savetxt('.//w.csv',trained_w,delimiter =',',fmt='%d')
		time_finish= time.time()
		print('training time:'+str(time_finish-time_start)+'s')		
		if(k!=1):
			print('err on training set')
			result = LR_classification(trained_w, aug_data, lable)
			eva_index[it,:] = evaluation(lable, result)
			print(eva_index[it])	
			print('evaluate on validation set')
			result = LR_classification(trained_w, augvalid_data, gt)			
			eva_index[it,:] = evaluation(gt, result)
			print(eva_index[it])	
def run(args):

    has_effect = False

    if args:
        try:

            train_corpus, val_corpus, test_corpus = data.get_train_val_test()
            words, embeddings = data.get_polyglot_words_embeddings()

            parser = PCFG()
            parser.learn_probabilities_and_rules(train_corpus)
            parser.set_oov_module(OovModule, words, embeddings)

            if args.inference:

                get_gold(parser, test_corpus, filename='evaluation_data.gold')
                get_predictions(parser,
                                test_corpus,
                                filename='evaluation_data.parser_output')

            if args.evaluation:
                evaluation('evaluation_data.gold',
                           'evaluation_data.parser_output')

            if args.parse:
                parser.parse_from_txt(args.txt_path)

        except Exception as e:
            logger.exception(e)
            logger.error("Uhoh, the script halted with an error.")
    else:
        if not has_effect:
            logger.error(
                "Script halted without any effect. To run code, use command:\npython3 main.py <args>"
            )
Exemple #8
0
 def test4():
     b = chessboard()
     b.loads(
         '2:DF 1:EG 2:FG 1:FH 2:FJ 2:GG 1:GH 1:GI 2:HG 1:HH 1:IG 2:IH 1:JF 2:JI 1:KE'
     )
     b.loads(
         '2:CE 2:CK 1:DF 1:DK 2:DL 1:EG 1:EI 1:EK 2:FG 1:FH 1:FI 1:FJ 1:FK 2:FL 1:GD 2:GE 2:GF 2:GG 2:GH 1:GI 1:GK 2:HG 1:HH 2:HJ 2:HK 2:IG 1:JG 2:AA'
     )
     eva = evaluation()
     print b
     score = 0
     t = time.time()
     for i in xrange(10000):
         score = eva.evaluate(b.board(), 2)
     #eva.test(b.board())
     t = time.time() - t
     print score, t
     print eva.textrec(3)
     return 0
Exemple #9
0
                 'rb') as file:
    unary_freq = pickle.load(file)
file.close()

with codecs.open(os.path.join(sys.argv[8] + "\\", "PCFG_binary_freq.pkl"),
                 'rb') as file:
    binary_freq = pickle.load(file)
file.close()

with codecs.open(os.path.join(sys.argv[8] + "\\", "PCFG_postags_freq.pkl"),
                 'rb') as file:
    postags_freq = pickle.load(file)
file.close()

#####################################################################
#                               MAIN                                #
#####################################################################
parser = CYK_parser_class.CYK_parser()
parser.initialize(NT_set, T_set, postags_set, unary_freq, binary_freq,
                  postags_freq, unary_dict, binary_dict, postags_dict)

parser.parse_corpus(input=sys.argv[8] + "\\" + 'sequoia_test_corrected.txt',
                    output=sys.argv[8] + "\\" + 'output.txt')

#####################################################################
#                        Evaluation if needed                       #
#####################################################################
if sys.argv[7] == 'True':
    import evaluate
    evaluate.evaluation()
    print('Evaluation Done')
Exemple #10
0
 def __init__(self):
     self.evaluator = evaluation()
     self.board = [[0 for n in xrange(15)] for i in xrange(15)]
     self.gameover = 0
     self.overvalue = 0
     self.maxdepth = 3
Exemple #11
0
                    cur_loss, cur_loss1, cur_loss2))
        total_loss = 0
        total_loss1 = 0
        total_loss2 = 0

        start_time = time.time()
    if iter % save_interval == 0:
        save_path = os.path.join(args.save_path, args.task)
        if not os.path.exists(save_path):
            os.mkdir(save_path)
        torch.save([model, optimizer, criterion],
                   os.path.join(save_path, f'save_4_{args.lamda}.pt'))
        score = evaluation(model,
                           corpus,
                           args.task,
                           args.batch_size,
                           dataset='val',
                           div=True,
                           reg=True)
        print('DEV accuracy: ' + str(score))
        with open(os.path.join(save_path, f'record_4_{args.lamda}.txt'),
                  'a',
                  encoding='utf-8') as fpw:
            if iter == 0: fpw.write(str(args) + '\n')
            fpw.write(str(iter) + ':\tDEV accuracy:\t' + str(score) + '\n')

        if score > best_dev_score:
            best_dev_score = score
            torch.save([model, optimizer, criterion],
                       os.path.join(save_path, f'save_best_4_{args.lamda}.pt'))
Exemple #12
0
#print(X)
#print(Y)

x_train, y_train, x_test, y_test = split_dataset(X, Y, 0.8)
print('xtrain :', x_train.shape)
print('ytrain :', y_train.shape)

### Train du Model ###
''' Permet de train notre model et '''
inpt = (x_train.shape[1], x_train.shape[2])  #inpt = (2,22)
outp = y_train.shape[1]  #outp = 22
model = lstm_model(outp, inpt)
model.summary()

train_model(x_train, y_train, (x_test, y_test), model, 'mse', 'adam',
            ['accuracy'])
''' On visualisera l'apprentissage sur Tensorboard.
Commande "tensorboard --logdir trainings" sur terminal.
"trainings" est le dossier dans lequel s'enregistrent les data d'apprentissage.
Puis ouvrir l'url que nous renvoie le terminal (http://localhost:6006/). '''

## EVALUATION ##
'''Permet d'evaluer notre modele
x_test,y_test,nb_categories,les K top score and model en parametre. '''
K_topscore = 5
p = evaluation(x_test, y_test, nb_categories, K_topscore, model)

#print(p_acc)
#print(conf_mat)
Exemple #13
0
def main(model, data_set, parameters):
	with tf.Session(config = tf_config) as sess:
		sess.run(tf.global_variables_initializer())
		print("initialization Completed")
		print()

		start_time = time()
		(precisions, recalls, F1s, ndcgs, one_calls, eval_loss) = evaluation(model, sess, parameters.top_k, data_set.test_unobserved_dict, data_set.test_dict, data_set.all_items)
		ndcgs = np.mean(np.array(ndcgs), axis = 0)
		precisions = np.mean(np.array(precisions), axis = 0)
		recalls = np.mean(np.array(recalls), axis = 0)
		F1s = np.mean(np.array(F1s), axis = 0)
		one_calls = np.mean(np.array(one_calls), axis = 0)
		end_time = time()

		print('Init prediction completed [%.1f s]: eval_loss = %.4f, NDCG@%d= %.4f' % (end_time-start_time, np.mean(eval_loss), parameters.top_k, ndcgs[parameters.top_k]))
		displayResult("Precision",precisions)
		displayResult("Recall",recalls)
		displayResult("F1",F1s)
		displayResult("NDCG",ndcgs)
		displayResult("1-call",one_calls)
		print()

		best_prec, best_rec, best_f1, best_ndcg, best_1call, best_iter = precisions, recalls, F1s, ndcgs, one_calls, -1
		best_ndcg_5 = ndcgs[parameters.top_k]

		patience_count = 0

		tf.set_random_seed(1) # seed operations contain "operation level" and "graph level", here is the graph level, all variables defined later can generate the same random number across sessions.

		for epoch in range(parameters.max_epoch_number):

			users_input, items_input, ratings_input = get_train_instances(data_set.train_pairs, parameters.negative_ratio, parameters.n, parameters.m, data_set.all_users, data_set.all_items)
			train_pairs_number = len(users_input)

			shuffled_indexs = np.random.permutation(np.arange(train_pairs_number)) # "permutation" randomly disrupt the sequence of incoming list and return the shuffled list
			users_input = users_input[shuffled_indexs]
			items_input = items_input[shuffled_indexs]
			ratings_input = ratings_input[shuffled_indexs]

			batch_number = train_pairs_number // parameters.batch_size + 1 # calculate how many batches should be counted according to the batch size

			losses = [] # record train loss for each time
			start_time = time()
			for i in range(batch_number):
			    start = i * parameters.batch_size
			    end = np.min([train_pairs_number, (i+1)*parameters.batch_size]) # if the number of elements in last batch is less than batch size, just process the remaining elements

			    users_batch = users_input[start : end]
			    items_batch = items_input[start : end]
			    ratings_batch = ratings_input[start : end]
				# because we set user the shape=[None,1], so we should transform [user1, user2, ...] to [ [user1], [user2], ...] using [:,None], otherwise throw out error.
			    _, batch_loss = sess.run([model.optimizer, model.loss], feed_dict = {model.user : users_batch[:,None], model.item : items_batch[:,None], model.rating : ratings_batch[:,None]})
			    losses.append(batch_loss) # batch loss is the log loss of ([all_ratings_in_batch] ,[all_outputs_in_batch]), the result is a float type value and is appended to "losses"
			end_time = time()
			train_time = end_time - start_time

			if epoch % parameters.verbose == 0:
			    start_time = time()
			    (precisions, recalls, F1s, ndcgs, one_calls, eval_loss) = evaluation(model, sess, parameters.top_k, data_set.test_unobserved_dict, data_set.test_dict, data_set.all_items)
			    ndcgs = np.mean(np.array(ndcgs), axis = 0)
			    precisions = np.mean(np.array(precisions), axis = 0)
			    recalls = np.mean(np.array(recalls), axis = 0)
			    F1s = np.mean(np.array(F1s), axis = 0)
			    one_calls = np.mean(np.array(one_calls), axis = 0)
			    end_time = time()
			    eval_time = end_time- start_time

			    print('Iteration %d: train_loss = %.4f[%.1f s], eval_loss = %.4f[%.1f s], NDCG@%d = %.4f'% (epoch+1, np.mean(losses), train_time, np.mean(eval_loss), eval_time, parameters.top_k, ndcgs[parameters.top_k]))
			    displayResult("Precision", precisions)
			    displayResult("Recall", recalls)
			    displayResult("F1", F1s)
			    displayResult("NDCG" ,ndcgs)
			    displayResult("1-call", one_calls)
			    print()

			    if ndcgs[parameters.top_k] > best_ndcg_5: # evaluation() has insured that the real index starts from 1, eg: ndcgs[5] is the fifth ndcg value(ndcg@5)
			        best_prec, best_rec, best_f1, best_ndcg, best_1call, best_iter = precisions, recalls, F1s, ndcgs, one_calls, epoch+1
			        best_ndcg_5 = ndcgs[parameters.top_k]
			        patience_count = 0
			    else:
			        patience_count += 1

			    if patience_count > parameters.patience:
			        break

		print("End. Best Iteration %d: NDCG@%d = %.4f " % (best_iter, parameters.top_k, best_ndcg_5))
		displayResult("Precision", best_prec)
		displayResult("Recall", best_rec)
		displayResult("F1", best_f1)
		displayResult("NDCG", best_ndcg)
		displayResult("1-call", best_1call)
Exemple #14
0
def train_and_val(model, train_dataloader, val_dataloader, criterion, optimizer, args, config):
    '''
    input:
        model: torch.nn.Module, the neural network
        train_loader: a torch.utils.data.DataLoader obj for the training
        val_dataloader: for validation
        criterion: to compute the loss function
        args: parameters from the command line
        config: parameters specified inside
        # no metrics is used during training and validation
        # while loss is useless for testing.
    '''
    # restore weights here if starting in middle? PENDING

    print("len(train_dataloader.dataset)", len(train_dataloader.dataset))
    print("len(val_dataloader.dataset)", len(val_dataloader.dataset))
    
    epoch_loss = {"train_loss": [], "val_loss": []}
    loss_batches = {"train_loss": [], "val_loss": []}
    reconstruction_loss = {"train_loss": [], "val_loss": []}
    KL_loss = {"train_loss": [], "val_loss": []}
    best_val_loss = 20000.0 # Any very high number
    eval_metric = {"precision": [], "recall": []}

    for epoch in range(args.num_epochs):
        # run one epoch
        print("Epoch {}/{}".format(epoch + 1, args.num_epochs))
        # need to get the loss from the training too
        # train_loss for each epoch
        # need to pass the file_loc to do gradient checking during training
        
        train_epoch_loss, train_loss_batches, train_reconstruction_loss, train_KL_loss = train(
                model, train_dataloader, criterion, optimizer, config, epoch, config["exp_logs_dir"], args)
        
        epoch_loss["train_loss"].append(train_epoch_loss)
        loss_batches["train_loss"].append(train_loss_batches)
        reconstruction_loss["train_loss"].append(train_reconstruction_loss)
        KL_loss["train_loss"].append(train_KL_loss)
        
        # when to save the model is still not clear: during training the optimizer \
        # will change the weights of the model. Seems like save the latest weights \
        # and the best one
        
        val_loss, val_loss_batches, val_reconstruction_loss, val_KL_loss = validation(model,
                                        val_dataloader, criterion, config, epoch, args)
        
        epoch_loss["val_loss"].append(val_loss)
        loss_batches["val_loss"].append(val_loss_batches)
        reconstruction_loss["val_loss"].append(val_reconstruction_loss)
        KL_loss["val_loss"].append(val_KL_loss)
        
        # check for the best model
        is_best = val_loss<=best_val_loss
        
        # Save weights, overwrite the last one and the new best one
        print("save_checkpoint")
        utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model.state_dict(),
                               'optim_dict' : optimizer.state_dict()}, 
                               is_best=is_best,
                               checkpoint=config["exp_save_models_dir"])

        if is_best:
            print("is_best")
            best_loss_json_file = os.path.join(config["exp_save_models_dir"], "best_loss_batches.json")
            utils.save_dict_to_json(loss_batches, best_loss_json_file)
            best_val_loss = val_loss

        # overwrite the last epoch losses
        last_loss_json_file = os.path.join(config["exp_save_models_dir"], "last_loss_batches.json")
        utils.save_dict_to_json(loss_batches, last_loss_json_file)
        
        # Is there a better way to wrap large num of arguments
        prec, rec, _ = evaluate.evaluation(model, args, config)
        eval_metric["precision"].append(prec) 
        eval_metric["recall"].append(rec)
        
    eval_metric_json_file = os.path.join(config["exp_save_models_dir"], "val_eval_metric.json")
    utils.save_dict_to_json(eval_metric, eval_metric_json_file)
        
    all_epoch_loss_json_file = os.path.join(config["exp_save_models_dir"], "all_epoch_loss.json")
    utils.save_dict_to_json(epoch_loss, all_epoch_loss_json_file)
    
    # plot the stats
    graph_type = "epoch_loss"
    file_name = config["exp_logs_dir"]+graph_type+str(".pdf")
    utils.plot_loss_stats(epoch_loss, "Total Epoch Loss", file_name)
    
    graph_type = "reconstruction_loss"
    file_name = config["exp_logs_dir"]+graph_type+str(".pdf")
    utils.plot_loss_stats(reconstruction_loss, "Reconstruction Loss", file_name)
    #save the logs
    reconstruction_loss_json_file = os.path.join(config["exp_save_models_dir"], "reconstruction_loss.json")
    utils.save_dict_to_json(reconstruction_loss, reconstruction_loss_json_file)
    
    graph_type = "kL_loss"
    file_name = config["exp_logs_dir"]+graph_type+str(".pdf")
    utils.plot_loss_stats(KL_loss, "KL Loss", file_name)
    KL_loss_json_file = os.path.join(config["exp_save_models_dir"], "kL_loss.json")
    utils.save_dict_to_json(KL_loss, KL_loss_json_file)
    
# %% Accuracy Evaluation Example
import evaluate
targets = [[1, 1, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 1, 1]]
outputs = [[0.1, 0.86, 0.2, 0.1, .02, 0.1],
[0.4, 0.12, 0.768, 0.145, 0.1, 0.8],
[0.454, 0.35, 0.21, 0.0, 0.89, 0.9999]]

eval = evaluate.evaluation(targets,outputs)

print('%s\n','Confusion Results')
print('\tConfusion value = %0.2f\n', eval.confusion.c)

print('%s\n','Confusion Matrix')

for row in eval.confusion.cm:
    print('\t')
    for col in row:
        print('%0.2f ', col)

print('\n')
print('%s\n','Indices')
for row in eval.confusion.ind:
    for col in row:
        s=col
        if s == 0:
            print('\t[]')
        else:
            if (s == 1):
                print('\t[%d]',col)
Exemple #16
0
    #
    #
    userList = range(numOfUser)
    userList = range(10000)
    everyOneLike = behavior.groupby('TV_NAME').agg({"newUserID": "count"})
    everyOneLike = everyOneLike.sort_values(by='newUserID',
                                            ascending=False).index.tolist()

    for p in [0.01]:
        x, y, z = evaluation(userList,
                             numOfUser,
                             numOfItem,
                             behavior,
                             test,
                             userLatentFactor,
                             userClassLatentFactor,
                             itemLatentFactor,
                             itemClassLatentFactor,
                             userBelong,
                             itemBelong,
                             N=10,
                             everyOneLike=everyOneLike,
                             rateList=(p, p, 1 - 4 * p, 2 * p))
        print(x, y, z)

    # # 拿到原始信息
    userHasPayHistory = load_pickle("./temp/userHasPayHistory.data")
    itemNeedPay = load_pickle("./temp/itemNeedPay.data")
    mediaid = load_pickle("./temp/mediaid.data")
    userid = load_pickle("./temp/userid.data")

    itemNeedPayList = [
Exemple #17
0
	pred_result = np.array(pred_result)
	return pred_result

if __name__ == '__main__':
	#data = np.array([[1,1,1],[1,1,1],[1,0,-1],[0,1,-1],[0,1,-1]])
	#data = pd.read_csv('.\\my_train.csv',header=None).values
	k = 10
	data = pd.read_csv('.\\train.csv',header=None).values
	featurenumber = np.arange(data.shape[1]-1)
	splited_data = split_dataset(data, k)
	cnt = 1
	eva_index = np.zeros((10,4))
	for it in range(k):	
		print('Iteration '+str(cnt))
		cnt+=1	
		if(k!=1):
			train_set = np.delete(splited_data,it,axis=0).reshape((data.shape[0]//k)*(k-1),data.shape[1])
			valid_set = splited_data[it] # use the rest fold data as validation set
		else:
			train_set = data
		tree = dt_train(train_set,featurenumber,'CART')
		if(k!=1):
			result = dt_classification(tree, valid_set)
			eva_index[it,:] = evaluation(valid_set[:,-1], result)
			print(eva_index[it])
			
	np.savetxt('.//evaluation_indicators_for_CART.csv',eva_index,delimiter =',',fmt='%.6f')

	#createPlot(tree)	
	#test_data = pd.read_csv('.\\test.csv',header=None).values
	#print(dt_classification(tree,test_data))
Exemple #18
0
        criterion = torch.nn.CrossEntropyLoss()
        # size_average is set to False, the losses are instead summed for each minibatch
        #criterion.size_average = False
        learning_rate = 1e-4
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=1e-5)

    # train the model
    if args.exp_type == "train":
        train.train_and_val(model, train_dataloader, val_dataloader, \
                      criterion, optimizer, args, model_config)

    if args.exp_type == "evaluate":
        eval_config = {
            "num_items": num_items,
            "train_user_item_interaction_dict":
            train_user_item_interaction_dict,
            "test_user_item_interaction_dict": test_user_item_interaction_dict,
            "exp_save_models_dir":
            "./experiments/cvae/saved_models/analysis12/",
            "slate_size": 5  # for different slate size
        }
        # Is there a better way to wrap large num of arguments
        precision, recall, user_test_metric = evaluate.evaluation(
            model, args, eval_config)

        user_test_metric_json_file = os.path.join(
            eval_config["exp_save_models_dir"], "user_test_metric.json")
        utils.save_dict_to_json(user_test_metric, user_test_metric_json_file)