def run(args): corpus_train, corpus_val, corpus_test = get.compute_corpora() vocabulary, embeddings = get.compute_polyglot_words_embeddings() rules, non_terms, proba_unary, proba_binary, transition_rules = pcfg.compute_proba_and_rules( corpus_train) if args.do_inference: get.compute_ground_truth(corpus_test, filename='evaluation_data.ground_truth') get.compute_predictions(rules, non_terms, proba_unary, proba_binary, transition_rules, vocabulary, embeddings, corpus_test, filename='evaluation_data.parser_output') if args.evaluate: evaluation('evaluation_data.ground_truth', 'evaluation_data.parser_output') if args.parse: parsing.parse_from_txt(args.txt_path, rules, non_terms, proba_unary, proba_binary, transition_rules, vocabulary, embeddings)
def test3(): e = evaluation() line = [0, 0, 1, 0, 1, 1, 1, 0, 0, 0] record = [] e.analysis_line(line, record, len(line), 6) print record[:10] return 0
def test2(): b = chessboard() b[7][7] = 1 b[8][8] = 2 b[7][9] = 1 eva = evaluation() for l in eva.POS: print l return 0
def main(_): FLAGS.agent = model(params=FLAGS) FLAGS.environment = get_env(FLAGS) FLAGS.act = action() FLAGS.step_max = FLAGS.environment.data_len() FLAGS.train_freq = 40 FLAGS.update_q_freq = 50 FLAGS.gamma = 0.97 FLAGS.show_log_freq = 5 FLAGS.memory = [] #Experience(FLAGS.memory_size) init = tf.global_variables_initializer() saver = tf.train.Saver() #创建用于保存模型的目录 if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) start = time.time() with tf.Session() as sess: sess.run(init) eval = evaluation(FLAGS, sess) ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: print('Loading Model...') saver.restore(sess, ckpt.model_checkpoint_path) total_step = 1 print('\t'.join( map(str, [ "epoch", "epsilon", "total_step", "rewardPerEpoch", "profits", "lossPerBatch", "elapsed_time" ]))) for epoch in range(FLAGS.epoch_num): avg_loss_per_batch, total_reward, total_step, profits = run_epch( FLAGS, sess, total_step) # total_rewards.append(total_reward) # total_losses.append(total_loss) if (epoch + 1) % FLAGS.show_log_freq == 0: # log_reward = sum(total_rewards[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq # log_loss = sum(total_losses[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq elapsed_time = time.time() - start #print('\t'.join(map(str, [epoch+1, FLAGS.act.epsilon, total_step, log_reward, log_loss, elapsed_time]))) print('\t'.join( map(str, [ epoch + 1, FLAGS.act.epsilon, total_step, total_reward, profits, avg_loss_per_batch, elapsed_time ]))) start = time.time() saver.save( sess, FLAGS.model_dir + '\model-' + str(epoch + 1) + '.ckpt') eval.eval()
def LR_train(data, label, alpha, valid_data, gt, mode='SGD'): # data第一列全1,增广向量 eva = np.zeros((1000,4)) eva2 = np.zeros((1000,4)) cnt = 0 penalty_lambda = 10 w = np.zeros(data.shape[1]) err = data.shape[0] cnt = 0 if(mode=='SGD'): for i in range(1000): if(i>800): alpha *= 0.95 sum_g = np.zeros(data.shape[1]) for x,y in zip(data,label): err = y - sigmoid(np.dot(x,w)) w += alpha*(err*x+2*penalty_lambda*2) mult_result = np.sum(data*w, axis=1) result = np.array(list(map(to_lable,mult_result))).astype(np.int) eva[i,:] = evaluation(label, result) elif(mode=='GD'): for i in range(1000): if(i>800): alpha *= 0.95 err = label - sigmoid(np.sum(data*w,axis=1)) # since we use label - h, here use add w += alpha*(np.sum(data.transpose()*err,axis=1)+2*penalty_lambda*w) mult_result = np.sum(data*w, axis=1) result = np.array(list(map(to_lable,mult_result))).astype(np.int) eva[i,:] = evaluation(label, result) result = LR_classification(w, valid_data, gt) eva2[i,:] = evaluation(gt, result) np.savetxt('.//training_eva.csv',eva,delimiter =',',fmt='%.6f') np.savetxt('.//training_eva2.csv',eva2,delimiter =',',fmt='%.6f') return w
def LR(train_file, test_file, valid_flag, test_flag, mode): k = 5 alpha = 0.000001 data = pd.read_csv('.\\train.csv',header=None).values splited_data = split_dataset(data, k) cnt = 1 eva_index = np.zeros((10,4)) for it in range(k): print('Iteration '+str(cnt)) cnt+=1 if(k!=1): train_set = np.delete(splited_data,it,axis=0).reshape((data.shape[0]//k)*(k-1),data.shape[1]) lable = train_set[:,-1] train_set = train_set[:,0:-1] aug_data = np.column_stack((np.ones(train_set.shape[0]).transpose(),train_set)) valid_set = splited_data[it] # use the rest fold data as validation set gt = valid_set[:,-1] valid_set = valid_set[:,0:-1] augvalid_data = np.column_stack((np.ones(valid_set.shape[0]).transpose(),valid_set)) else: lable = data[:,-1] train_set = data[:,0:-1] aug_data = np.column_stack((np.ones(train_set.shape[0]).transpose(),train_set)) print('Finish reading files') print('begin training with '+mode) time_start = time.time() trained_w = LR_train(aug_data, lable, alpha, augvalid_data, gt, mode=mode) np.savetxt('.//w.csv',trained_w,delimiter =',',fmt='%d') time_finish= time.time() print('training time:'+str(time_finish-time_start)+'s') if(k!=1): print('err on training set') result = LR_classification(trained_w, aug_data, lable) eva_index[it,:] = evaluation(lable, result) print(eva_index[it]) print('evaluate on validation set') result = LR_classification(trained_w, augvalid_data, gt) eva_index[it,:] = evaluation(gt, result) print(eva_index[it])
def run(args): has_effect = False if args: try: train_corpus, val_corpus, test_corpus = data.get_train_val_test() words, embeddings = data.get_polyglot_words_embeddings() parser = PCFG() parser.learn_probabilities_and_rules(train_corpus) parser.set_oov_module(OovModule, words, embeddings) if args.inference: get_gold(parser, test_corpus, filename='evaluation_data.gold') get_predictions(parser, test_corpus, filename='evaluation_data.parser_output') if args.evaluation: evaluation('evaluation_data.gold', 'evaluation_data.parser_output') if args.parse: parser.parse_from_txt(args.txt_path) except Exception as e: logger.exception(e) logger.error("Uhoh, the script halted with an error.") else: if not has_effect: logger.error( "Script halted without any effect. To run code, use command:\npython3 main.py <args>" )
def test4(): b = chessboard() b.loads( '2:DF 1:EG 2:FG 1:FH 2:FJ 2:GG 1:GH 1:GI 2:HG 1:HH 1:IG 2:IH 1:JF 2:JI 1:KE' ) b.loads( '2:CE 2:CK 1:DF 1:DK 2:DL 1:EG 1:EI 1:EK 2:FG 1:FH 1:FI 1:FJ 1:FK 2:FL 1:GD 2:GE 2:GF 2:GG 2:GH 1:GI 1:GK 2:HG 1:HH 2:HJ 2:HK 2:IG 1:JG 2:AA' ) eva = evaluation() print b score = 0 t = time.time() for i in xrange(10000): score = eva.evaluate(b.board(), 2) #eva.test(b.board()) t = time.time() - t print score, t print eva.textrec(3) return 0
'rb') as file: unary_freq = pickle.load(file) file.close() with codecs.open(os.path.join(sys.argv[8] + "\\", "PCFG_binary_freq.pkl"), 'rb') as file: binary_freq = pickle.load(file) file.close() with codecs.open(os.path.join(sys.argv[8] + "\\", "PCFG_postags_freq.pkl"), 'rb') as file: postags_freq = pickle.load(file) file.close() ##################################################################### # MAIN # ##################################################################### parser = CYK_parser_class.CYK_parser() parser.initialize(NT_set, T_set, postags_set, unary_freq, binary_freq, postags_freq, unary_dict, binary_dict, postags_dict) parser.parse_corpus(input=sys.argv[8] + "\\" + 'sequoia_test_corrected.txt', output=sys.argv[8] + "\\" + 'output.txt') ##################################################################### # Evaluation if needed # ##################################################################### if sys.argv[7] == 'True': import evaluate evaluate.evaluation() print('Evaluation Done')
def __init__(self): self.evaluator = evaluation() self.board = [[0 for n in xrange(15)] for i in xrange(15)] self.gameover = 0 self.overvalue = 0 self.maxdepth = 3
cur_loss, cur_loss1, cur_loss2)) total_loss = 0 total_loss1 = 0 total_loss2 = 0 start_time = time.time() if iter % save_interval == 0: save_path = os.path.join(args.save_path, args.task) if not os.path.exists(save_path): os.mkdir(save_path) torch.save([model, optimizer, criterion], os.path.join(save_path, f'save_4_{args.lamda}.pt')) score = evaluation(model, corpus, args.task, args.batch_size, dataset='val', div=True, reg=True) print('DEV accuracy: ' + str(score)) with open(os.path.join(save_path, f'record_4_{args.lamda}.txt'), 'a', encoding='utf-8') as fpw: if iter == 0: fpw.write(str(args) + '\n') fpw.write(str(iter) + ':\tDEV accuracy:\t' + str(score) + '\n') if score > best_dev_score: best_dev_score = score torch.save([model, optimizer, criterion], os.path.join(save_path, f'save_best_4_{args.lamda}.pt'))
#print(X) #print(Y) x_train, y_train, x_test, y_test = split_dataset(X, Y, 0.8) print('xtrain :', x_train.shape) print('ytrain :', y_train.shape) ### Train du Model ### ''' Permet de train notre model et ''' inpt = (x_train.shape[1], x_train.shape[2]) #inpt = (2,22) outp = y_train.shape[1] #outp = 22 model = lstm_model(outp, inpt) model.summary() train_model(x_train, y_train, (x_test, y_test), model, 'mse', 'adam', ['accuracy']) ''' On visualisera l'apprentissage sur Tensorboard. Commande "tensorboard --logdir trainings" sur terminal. "trainings" est le dossier dans lequel s'enregistrent les data d'apprentissage. Puis ouvrir l'url que nous renvoie le terminal (http://localhost:6006/). ''' ## EVALUATION ## '''Permet d'evaluer notre modele x_test,y_test,nb_categories,les K top score and model en parametre. ''' K_topscore = 5 p = evaluation(x_test, y_test, nb_categories, K_topscore, model) #print(p_acc) #print(conf_mat)
def main(model, data_set, parameters): with tf.Session(config = tf_config) as sess: sess.run(tf.global_variables_initializer()) print("initialization Completed") print() start_time = time() (precisions, recalls, F1s, ndcgs, one_calls, eval_loss) = evaluation(model, sess, parameters.top_k, data_set.test_unobserved_dict, data_set.test_dict, data_set.all_items) ndcgs = np.mean(np.array(ndcgs), axis = 0) precisions = np.mean(np.array(precisions), axis = 0) recalls = np.mean(np.array(recalls), axis = 0) F1s = np.mean(np.array(F1s), axis = 0) one_calls = np.mean(np.array(one_calls), axis = 0) end_time = time() print('Init prediction completed [%.1f s]: eval_loss = %.4f, NDCG@%d= %.4f' % (end_time-start_time, np.mean(eval_loss), parameters.top_k, ndcgs[parameters.top_k])) displayResult("Precision",precisions) displayResult("Recall",recalls) displayResult("F1",F1s) displayResult("NDCG",ndcgs) displayResult("1-call",one_calls) print() best_prec, best_rec, best_f1, best_ndcg, best_1call, best_iter = precisions, recalls, F1s, ndcgs, one_calls, -1 best_ndcg_5 = ndcgs[parameters.top_k] patience_count = 0 tf.set_random_seed(1) # seed operations contain "operation level" and "graph level", here is the graph level, all variables defined later can generate the same random number across sessions. for epoch in range(parameters.max_epoch_number): users_input, items_input, ratings_input = get_train_instances(data_set.train_pairs, parameters.negative_ratio, parameters.n, parameters.m, data_set.all_users, data_set.all_items) train_pairs_number = len(users_input) shuffled_indexs = np.random.permutation(np.arange(train_pairs_number)) # "permutation" randomly disrupt the sequence of incoming list and return the shuffled list users_input = users_input[shuffled_indexs] items_input = items_input[shuffled_indexs] ratings_input = ratings_input[shuffled_indexs] batch_number = train_pairs_number // parameters.batch_size + 1 # calculate how many batches should be counted according to the batch size losses = [] # record train loss for each time start_time = time() for i in range(batch_number): start = i * parameters.batch_size end = np.min([train_pairs_number, (i+1)*parameters.batch_size]) # if the number of elements in last batch is less than batch size, just process the remaining elements users_batch = users_input[start : end] items_batch = items_input[start : end] ratings_batch = ratings_input[start : end] # because we set user the shape=[None,1], so we should transform [user1, user2, ...] to [ [user1], [user2], ...] using [:,None], otherwise throw out error. _, batch_loss = sess.run([model.optimizer, model.loss], feed_dict = {model.user : users_batch[:,None], model.item : items_batch[:,None], model.rating : ratings_batch[:,None]}) losses.append(batch_loss) # batch loss is the log loss of ([all_ratings_in_batch] ,[all_outputs_in_batch]), the result is a float type value and is appended to "losses" end_time = time() train_time = end_time - start_time if epoch % parameters.verbose == 0: start_time = time() (precisions, recalls, F1s, ndcgs, one_calls, eval_loss) = evaluation(model, sess, parameters.top_k, data_set.test_unobserved_dict, data_set.test_dict, data_set.all_items) ndcgs = np.mean(np.array(ndcgs), axis = 0) precisions = np.mean(np.array(precisions), axis = 0) recalls = np.mean(np.array(recalls), axis = 0) F1s = np.mean(np.array(F1s), axis = 0) one_calls = np.mean(np.array(one_calls), axis = 0) end_time = time() eval_time = end_time- start_time print('Iteration %d: train_loss = %.4f[%.1f s], eval_loss = %.4f[%.1f s], NDCG@%d = %.4f'% (epoch+1, np.mean(losses), train_time, np.mean(eval_loss), eval_time, parameters.top_k, ndcgs[parameters.top_k])) displayResult("Precision", precisions) displayResult("Recall", recalls) displayResult("F1", F1s) displayResult("NDCG" ,ndcgs) displayResult("1-call", one_calls) print() if ndcgs[parameters.top_k] > best_ndcg_5: # evaluation() has insured that the real index starts from 1, eg: ndcgs[5] is the fifth ndcg value(ndcg@5) best_prec, best_rec, best_f1, best_ndcg, best_1call, best_iter = precisions, recalls, F1s, ndcgs, one_calls, epoch+1 best_ndcg_5 = ndcgs[parameters.top_k] patience_count = 0 else: patience_count += 1 if patience_count > parameters.patience: break print("End. Best Iteration %d: NDCG@%d = %.4f " % (best_iter, parameters.top_k, best_ndcg_5)) displayResult("Precision", best_prec) displayResult("Recall", best_rec) displayResult("F1", best_f1) displayResult("NDCG", best_ndcg) displayResult("1-call", best_1call)
def train_and_val(model, train_dataloader, val_dataloader, criterion, optimizer, args, config): ''' input: model: torch.nn.Module, the neural network train_loader: a torch.utils.data.DataLoader obj for the training val_dataloader: for validation criterion: to compute the loss function args: parameters from the command line config: parameters specified inside # no metrics is used during training and validation # while loss is useless for testing. ''' # restore weights here if starting in middle? PENDING print("len(train_dataloader.dataset)", len(train_dataloader.dataset)) print("len(val_dataloader.dataset)", len(val_dataloader.dataset)) epoch_loss = {"train_loss": [], "val_loss": []} loss_batches = {"train_loss": [], "val_loss": []} reconstruction_loss = {"train_loss": [], "val_loss": []} KL_loss = {"train_loss": [], "val_loss": []} best_val_loss = 20000.0 # Any very high number eval_metric = {"precision": [], "recall": []} for epoch in range(args.num_epochs): # run one epoch print("Epoch {}/{}".format(epoch + 1, args.num_epochs)) # need to get the loss from the training too # train_loss for each epoch # need to pass the file_loc to do gradient checking during training train_epoch_loss, train_loss_batches, train_reconstruction_loss, train_KL_loss = train( model, train_dataloader, criterion, optimizer, config, epoch, config["exp_logs_dir"], args) epoch_loss["train_loss"].append(train_epoch_loss) loss_batches["train_loss"].append(train_loss_batches) reconstruction_loss["train_loss"].append(train_reconstruction_loss) KL_loss["train_loss"].append(train_KL_loss) # when to save the model is still not clear: during training the optimizer \ # will change the weights of the model. Seems like save the latest weights \ # and the best one val_loss, val_loss_batches, val_reconstruction_loss, val_KL_loss = validation(model, val_dataloader, criterion, config, epoch, args) epoch_loss["val_loss"].append(val_loss) loss_batches["val_loss"].append(val_loss_batches) reconstruction_loss["val_loss"].append(val_reconstruction_loss) KL_loss["val_loss"].append(val_KL_loss) # check for the best model is_best = val_loss<=best_val_loss # Save weights, overwrite the last one and the new best one print("save_checkpoint") utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best=is_best, checkpoint=config["exp_save_models_dir"]) if is_best: print("is_best") best_loss_json_file = os.path.join(config["exp_save_models_dir"], "best_loss_batches.json") utils.save_dict_to_json(loss_batches, best_loss_json_file) best_val_loss = val_loss # overwrite the last epoch losses last_loss_json_file = os.path.join(config["exp_save_models_dir"], "last_loss_batches.json") utils.save_dict_to_json(loss_batches, last_loss_json_file) # Is there a better way to wrap large num of arguments prec, rec, _ = evaluate.evaluation(model, args, config) eval_metric["precision"].append(prec) eval_metric["recall"].append(rec) eval_metric_json_file = os.path.join(config["exp_save_models_dir"], "val_eval_metric.json") utils.save_dict_to_json(eval_metric, eval_metric_json_file) all_epoch_loss_json_file = os.path.join(config["exp_save_models_dir"], "all_epoch_loss.json") utils.save_dict_to_json(epoch_loss, all_epoch_loss_json_file) # plot the stats graph_type = "epoch_loss" file_name = config["exp_logs_dir"]+graph_type+str(".pdf") utils.plot_loss_stats(epoch_loss, "Total Epoch Loss", file_name) graph_type = "reconstruction_loss" file_name = config["exp_logs_dir"]+graph_type+str(".pdf") utils.plot_loss_stats(reconstruction_loss, "Reconstruction Loss", file_name) #save the logs reconstruction_loss_json_file = os.path.join(config["exp_save_models_dir"], "reconstruction_loss.json") utils.save_dict_to_json(reconstruction_loss, reconstruction_loss_json_file) graph_type = "kL_loss" file_name = config["exp_logs_dir"]+graph_type+str(".pdf") utils.plot_loss_stats(KL_loss, "KL Loss", file_name) KL_loss_json_file = os.path.join(config["exp_save_models_dir"], "kL_loss.json") utils.save_dict_to_json(KL_loss, KL_loss_json_file)
# %% Accuracy Evaluation Example import evaluate targets = [[1, 1, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 1]] outputs = [[0.1, 0.86, 0.2, 0.1, .02, 0.1], [0.4, 0.12, 0.768, 0.145, 0.1, 0.8], [0.454, 0.35, 0.21, 0.0, 0.89, 0.9999]] eval = evaluate.evaluation(targets,outputs) print('%s\n','Confusion Results') print('\tConfusion value = %0.2f\n', eval.confusion.c) print('%s\n','Confusion Matrix') for row in eval.confusion.cm: print('\t') for col in row: print('%0.2f ', col) print('\n') print('%s\n','Indices') for row in eval.confusion.ind: for col in row: s=col if s == 0: print('\t[]') else: if (s == 1): print('\t[%d]',col)
# # userList = range(numOfUser) userList = range(10000) everyOneLike = behavior.groupby('TV_NAME').agg({"newUserID": "count"}) everyOneLike = everyOneLike.sort_values(by='newUserID', ascending=False).index.tolist() for p in [0.01]: x, y, z = evaluation(userList, numOfUser, numOfItem, behavior, test, userLatentFactor, userClassLatentFactor, itemLatentFactor, itemClassLatentFactor, userBelong, itemBelong, N=10, everyOneLike=everyOneLike, rateList=(p, p, 1 - 4 * p, 2 * p)) print(x, y, z) # # 拿到原始信息 userHasPayHistory = load_pickle("./temp/userHasPayHistory.data") itemNeedPay = load_pickle("./temp/itemNeedPay.data") mediaid = load_pickle("./temp/mediaid.data") userid = load_pickle("./temp/userid.data") itemNeedPayList = [
pred_result = np.array(pred_result) return pred_result if __name__ == '__main__': #data = np.array([[1,1,1],[1,1,1],[1,0,-1],[0,1,-1],[0,1,-1]]) #data = pd.read_csv('.\\my_train.csv',header=None).values k = 10 data = pd.read_csv('.\\train.csv',header=None).values featurenumber = np.arange(data.shape[1]-1) splited_data = split_dataset(data, k) cnt = 1 eva_index = np.zeros((10,4)) for it in range(k): print('Iteration '+str(cnt)) cnt+=1 if(k!=1): train_set = np.delete(splited_data,it,axis=0).reshape((data.shape[0]//k)*(k-1),data.shape[1]) valid_set = splited_data[it] # use the rest fold data as validation set else: train_set = data tree = dt_train(train_set,featurenumber,'CART') if(k!=1): result = dt_classification(tree, valid_set) eva_index[it,:] = evaluation(valid_set[:,-1], result) print(eva_index[it]) np.savetxt('.//evaluation_indicators_for_CART.csv',eva_index,delimiter =',',fmt='%.6f') #createPlot(tree) #test_data = pd.read_csv('.\\test.csv',header=None).values #print(dt_classification(tree,test_data))
criterion = torch.nn.CrossEntropyLoss() # size_average is set to False, the losses are instead summed for each minibatch #criterion.size_average = False learning_rate = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # train the model if args.exp_type == "train": train.train_and_val(model, train_dataloader, val_dataloader, \ criterion, optimizer, args, model_config) if args.exp_type == "evaluate": eval_config = { "num_items": num_items, "train_user_item_interaction_dict": train_user_item_interaction_dict, "test_user_item_interaction_dict": test_user_item_interaction_dict, "exp_save_models_dir": "./experiments/cvae/saved_models/analysis12/", "slate_size": 5 # for different slate size } # Is there a better way to wrap large num of arguments precision, recall, user_test_metric = evaluate.evaluation( model, args, eval_config) user_test_metric_json_file = os.path.join( eval_config["exp_save_models_dir"], "user_test_metric.json") utils.save_dict_to_json(user_test_metric, user_test_metric_json_file)