def main(argv): # Set Enviroment and GPU Options os.environ['TF_CPP_MIN_LOG_LEVEL']='3' tf.logging.set_verbosity(tf.logging.INFO) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) session_config = tf.ConfigProto( inter_op_parallelism_threads = Hp.inter_op_parallelism_threads, intra_op_parallelism_threads = Hp.intra_op_parallelism_threads, allow_soft_placement = True, log_device_placement = False, gpu_options = gpu_options) session_config.gpu_options.allow_growth = True # Set log dir specifically Hp.logdir = os.path.join(Hp.logdir, "test{}".format(sys.argv[1])) if sys.argv[2] == 'train': # Train branch (Train branch also contains Eval branch, see train.py and Hyperparameter.py for more details) print ("Training Mode") train.train(session_config) elif sys.argv[2] == 'eval': #Eval print ("Evaluation Mode") evaluation.eval(session_config) elif sys.argv[2] == 'synthes': print ("Synthesize Mode") synthesize.synthesize(session_config) else: print ("Uncognized mode! You need type mode chosen from train/eval/synthes.")
def running_train(batches_train, batches_test, model, params): optimizer = torch.optim.Adam(model.parameters(), lr=params.l2_reg_lambda) steps = 0 for epoch in range(1, params.num_epochs + 1): for batch in batches_train: pad_msg, pad_code, labels = batch if torch.cuda.is_available(): pad_msg, pad_code, labels = torch.tensor( pad_msg).cuda(), torch.tensor( pad_code).cuda(), torch.cuda.FloatTensor(labels) else: pad_msg, pad_code, labels = torch.tensor(pad_msg).long( ), torch.tensor(pad_code).long(), torch.tensor(labels).float() optimizer.zero_grad() predict = model.forward(pad_msg, pad_code) loss = nn.BCELoss() loss = loss(predict, labels) loss.backward() optimizer.step() steps += 1 if steps % params.log_interval == 0: print('\rEpoch: {} step: {} - loss: {:.6f}'.format( epoch, steps, loss.item())) print('Epoch: %i ---Training data' % (epoch)) acc, prc, rc = eval(data=batches_train, model=model) print('Accuracy: %f -- Precision: %f -- Recall: %f' % (acc, prc, rc)) print('Epoch: %i ---Testing data' % (epoch)) acc, prc, rc = eval(data=batches_test, model=model) print('Accuracy: %f -- Precision: %f -- Recall: %f' % (acc, prc, rc)) save(model, params.save_dir, 'epoch', epoch)
def test(self, dataloader, model_dir): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as session: self.initilize(model_dir, session) gold_questions, pred_answers = list(), list() for ques, ques_lens, facts, resp, source, kbkb, modes, weights, ques_words, real_facts in \ dataloader.get_batchs(data=dataloader.test_data, shuffle=False): feed = dict() feed[self.encoder_inputs] = ques feed[self.encoder_lengths] = ques_lens feed[self.facts_inputs] = facts feed[self.decoder_inputs] = resp feed[self.decoder_sources] = source feed[self.decoder_kbkbs] = kbkb feed[self.decoder_modes] = modes feed[self.decoder_modes] = np.ones(np.shape(modes), dtype=int) #全部一样 feed[self.decoder_weights] = weights decoder_predicts = session.run([self.predict_truths], feed) decoder_predicts = decoder_predicts[0] predicts = np.transpose(decoder_predicts) gold_ouputs = np.transpose(resp) gold_inputs = np.transpose(ques) print 'vocab size: ', len(dataloader.vocab_list) print 'max ques len: ', dataloader.max_q_len for i in range(len(predicts)): input, output, predict = gold_inputs[i], gold_ouputs[ i], predicts[i] q_words, facts = ques_words[i], real_facts[i] words = list() for id in predict: if id in [data_utils.EOS_ID, data_utils.PAD_ID]: break if id < dataloader.vocab_size: words.append(dataloader.vocab_list[id]) elif id < dataloader.vocab_size + dataloader.max_q_len: q_id = id - dataloader.vocab_size words.append(q_words[q_id]) else: f_id = id - dataloader.vocab_size - dataloader.max_q_len words.append(facts[f_id][1]) gold_questions.append(''.join(ques_words[i])) #真实问题 pred_answers.append(''.join(words)) #预测结果 print('question: %s\ngolden: %s\npredict: %s' % (utils.ids_to_sentence(input.tolist(), dataloader.vocab_list, data_utils.EOS_ID, ''), utils.ids_to_sentence( output.tolist(), dataloader.vocab_list, data_utils.EOS_ID, ''), ''.join(words))) # break #开始测试 import evaluation evaluation.eval(pred_answers)
def train(session_config): for _ in range(Hp.train_step//Hp.train_step_per_eval): #for _ in range(1): # train train_per_eval(session_config) tf.reset_default_graph() # eval evaluation.eval(session_config) tf.reset_default_graph()
def train_predicted(train,test,method="l2r",fresh=False): pkl_file="features.pkl" if os.path.exists(pkl_file): features_train,names,features_test,names=pickle.load(open(pkl_file,'r')) else: import featureExtract features_train,names=featureExtract.getFeatureSofQA(train) features_test,names=featureExtract.getFeatureSofQA(test) pickle.dump((features_train,names,features_test,names),open(pkl_file,"w")) if method=="lr": x=features_train[names] y=features_train["flag"] test_x=features_test[names] clf = LinearRegression() # clf = tree.DecisionTreeRegressor() # clf = svm.SVR() clf.fit(x, y) print clf.coef_ predicted=clf.predict(test_x) print evaluation.eval(predicted,test) elif method=="l2r": write2file4L2r(features_train, names) write2file4L2r(features_test, names,flag="test") subprocess.call("java -jar lib/RankLib-2.7.jar -train train.LETOR -test test.LETOR -ranker 6 -kcv 5 -metric2t map -save mymodel.txt") elif method =="xgb": import xgboost as xgb # train,dev=splitByDf(train) features_train=features_train[names] # features_dev=getFeatureSofQA(dev) features_test =features_test[names] dtrain = xgb.DMatrix( features_train, label=train["flag"],weight = np.ones(len(train))) # ddev = xgb.DMatrix( features_dev, label=dev["flag"],weight = np.ones(len(dev))) param = {'bst:max_depth':2, 'bst:eta':0.3, 'silent':1, 'objective':'binary:logistic' } param['nthread'] = 4 plst = param.items() # plst += [('eval_metric', 'auc')] # Multiple evals can be handled in this way plst += [('eval_metric', 'map')] evallist = [(dtrain,'train')] num_round = 30 bst = xgb.train( plst, dtrain, num_round, evallist ) dtest = xgb.DMatrix( features_test, missing = -999.0 ) predicted = bst.predict( dtest) print evaluation.eval(predicted,test) else: print "no method"
def test(conf_name): if conf_name not in conf: raise Exception("config name not in config.py") config = conf[conf_name] print("load data...") user_pos_train, user_pos_test = load_data(config) all_users = list(user_pos_train.keys()) all_users.sort() print("load model...") discriminator = DIS(config) os.environ["CUDA_VISIBLE_DEVICES"] = config['CUDA_VISIBLE_DEVICES'] config_tf = tf.compat.v1.ConfigProto() config_tf.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config_tf) saver = tf.compat.v1.train.Saver(max_to_keep=1) checkpoint = os.path.join(config['output_dir'], 'model/SD-GAR') saver.restore(sess, checkpoint) mretic_name = [ 'p@3', 'P@5', 'P@10', 'P@50', 'NDCG@3', 'NDCG@5', 'NDCG@10', 'NDCG@50', 'MRR' ] print("%s" % get_current_time(), '\t'.join(["%7s" % x for x in mretic_name])) res = eval(sess, discriminator, user_pos_train, user_pos_test) print("%s" % get_current_time(), '\t'.join(["%.5f" % x for x in res]))
def eval(self, epoch): result_folder = os.path.join(cfg.ROOT_DIR, 'result') if not os.path.exists(result_folder): if (self.distributed == False) or (dist.get_rank() == 0): os.mkdir(result_folder) loaders = [] names = [] loaders.append(self.trg_test_loader) names.append(cfg.DATA_LOADER.TARGET) loaders.append(self.trg_real_loader) names.append('Real') for i, loader in enumerate(loaders): mean_acc, preds = evaluation.eval(epoch, names[i], loader, self.netG, self.netE) if mean_acc is not None: if (self.distributed == False) or (dist.get_rank() == 0): #self.logger.info(mean_acc) with open( os.path.join(result_folder, str(epoch) + '_' + names[i] + '.txt'), 'w') as fid: for v in preds: fid.write(str(v) + '\n')
def running_all_model_update(batches, model, params): if torch.cuda.is_available(): model.load_state_dict(torch.load(params.file_model)) else: model.load_state_dict(torch.load(params.file_model, map_location='cpu')) optimizer = torch.optim.Adam(model.parameters(), lr=params.l2_reg_lambda) steps, num_epoch = 0, 1 for epoch in range(1, params.num_epochs + 1): for batch in batches: pad_msg, pad_added_code, pad_removed_code, labels = batch if torch.cuda.is_available(): pad_msg, pad_added_code, pad_removed_code, labels = torch.tensor(pad_msg).cuda(), torch.tensor( pad_added_code).cuda(), torch.tensor(pad_removed_code).cuda(), torch.cuda.FloatTensor(labels) else: pad_msg, pad_added_code, pad_removed_code, labels = torch.tensor(pad_msg).long(), torch.tensor( pad_added_code).long(), torch.tensor(pad_removed_code).long(), torch.tensor(labels).float() optimizer.zero_grad() predict = model.forward(pad_msg, pad_added_code, pad_removed_code) loss = nn.BCELoss() loss = loss(predict, labels) loss.backward() optimizer.step() steps += 1 if steps % params.log_interval == 0: print('\rEpoch: {} step: {} - loss: {:.6f}'.format(num_epoch, steps, loss.item())) print('Epoch: %i / %i ---Data' % (epoch, params.num_epochs)) acc, prc, rc, f1, auc_ = eval(data=batches, model=model) print('Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) save(model, params.save_dir, 'epoch', num_epoch) num_epoch += 1
def parse_files(base_path, model_name, model, trees, vocab, \ max_edus, y_all, tag_to_ind_map, baseline, infiles_dir, gold_files_dir, pred_outdir="pred"): path_to_out = create_dir(base_path, pred_outdir) for tree in trees: fn = build_infile_name(tree._fname, base_path, infiles_dir, ["out.edus", "edus"]) queue = Queue.read_file(fn) stack = Stack() root = parse_file(queue, stack, model_name, model, tree, \ vocab, max_edus, y_all, tag_to_ind_map, baseline) predfn = path_to_out predfn += SEP predfn += tree._fname with open(predfn, "w") as ofh: print_serial_file(ofh, root, False) eval(gold_files_dir, "pred")
def train_predicted(train,test): train,names=featureExtract.getFeatureSofQA(train) x=train[names] y=train["flag"] clf = LinearRegression() clf.fit(x, y) print clf.coef_ test,names=featureExtract.getFeatureSofQA(test) test_x=test[names] predicted=clf.predict(test_x) print evaluation.eval(predicted,test) return predicted
def run_evaluation(eval_model, eval_sess, model_dir, input_eval_file, output_eval_file, input_emb_weights, summary_writer): with eval_model.graph.as_default(): # initialize the variables of the eval graph in eval_sess or load them from a checkpoint. loaded_eval_model = model_helper.create_or_load_model( eval_model.model, eval_sess, "eval", model_dir, input_emb_weights) eval_iterator_feed_dict = { eval_model.input_file_placeholder: input_eval_file, eval_model.output_file_placeholder: output_eval_file } dev_loss = evaluation.eval(loaded_eval_model, eval_sess, eval_model.iterator, eval_iterator_feed_dict) model_helper.add_summary(summary_writer, "dev_loss", dev_loss) return dev_loss
def validate(model, val_loader, epoch, measure='cosine', metric='mir'): # compute the encoding for all the validation videos and captions vis_embs, txt_embs, vis_ids, txt_ids = evaluation.encode_data( model, val_loader) keep_vis_order = [] keep_vis_ids = [] for i, vid in enumerate(vis_ids): if vid not in keep_vis_ids: keep_vis_order.append(i) keep_vis_ids.append(vid) vis_embs = vis_embs[keep_vis_order] vis_ids = keep_vis_ids # video retrieval txt2vis_sim = evaluation.compute_sim(txt_embs, vis_embs, measure) #(r1, r5, r10, medr, meanr, mir) = evaluation.eval_qry2retro(txt2vis_sim, n_qry=1) inds = np.argsort(txt2vis_sim, axis=1) label_matrix = np.zeros(inds.shape) for index in range(inds.shape[0]): ind = inds[index][::-1] label_matrix[index][np.where( np.array(vis_ids)[ind] == txt_ids[index].split('#')[0])[0]] = 1 (r1, r5, r10, medr, meanr, mir, mAP) = evaluation.eval(label_matrix) sum_recall = r1 + r5 + r10 print(" * Text to video:") print(" * r_1_5_10: {}".format([round(r1, 3), round(r5, 3), round(r10, 3)])) print(" * medr, meanr, mir: {}".format( [round(medr, 3), round(meanr, 3), round(mir, 3)])) print(" * mAP: {}".format(round(mAP, 3))) print(" * " + '-' * 10) writer.add_scalar('val/r1', r1, epoch) writer.add_scalar('val/r5', r5, epoch) writer.add_scalar('val/r10', r10, epoch) writer.add_scalar('val/medr', medr, epoch) writer.add_scalar('val/meanr', meanr, epoch) writer.add_scalar('val/mir', mir, epoch) writer.add_scalar('val/mAP', mAP, epoch) return locals().get(metric, mir)
def main(args): # load reference data ext_data, abs_data = data_load() # stragety (make psuedo extractive data) if args.strategy == "textrank": ext_data = textrank() elif args.strategy == "lead_n": ext_data = lead_n() elif args.strategy == "principal": ext_data = principal() # data preprocessing ext_train_loader, ext_valid_loader, ext_test_loader = \ data_preprocessing(ext_data) abs_train_loader, abs_valid_loader, abs_test_loader = \ data_preprocessing(abs_data) # # load Pretrained model, tokenizer tokenizer = get_kobart_tokenizer() model = get_kobart_for_conditional_generation() model.to(device) a = args.ext_epochs # extractive summarization 학습 횟수 b = args.abs_epochs # abstractive summarization (k-a) 학습 횟수 # hyperparameter optimizer = torch.optim.AdamW(model.parameters(), lr=1e-6) torch.manual_seed(args.seed) # train # 1차 미세조정 model = train(ext_train_loader, ext_valid_loader, a, model, tokenizer, optimizer, device) # 2차 미세조정 model = train(abs_train_loader, abs_valid_loader, b, model, tokenizer, optimizer, device) # evaluation rouge1_score, rouge2_score, rougel_score = eval(model, tokenizer, abs_test_loader, device) # print performance print(rouge1_score, rouge2_score, rougel_score)
def train_adaboost(X, Y, k=10): k_fold = model_selection.KFold(n_splits=k, shuffle=True) acc_all, auc_all, f1_all = 0, 0, 0 clf = AdaBoostClassifier() for train_idx, test_idx in k_fold.split(X): X_train, X_test = X[train_idx], X[test_idx] Y_train, Y_test = Y[train_idx], Y[test_idx] clf.fit(X_train, Y_train) predict = clf.predict(X_test) acc, auc, f1 = evaluation.eval(predict, Y_test) acc_all += acc auc_all += auc f1_all += f1 acc_all /= k auc_all /= k f1_all /= k print("acc: " + str(acc_all) + ' auc: ' + str(auc_all) + 'f1 ' + str(f1_all))
def train_svc(X, Y, k=10): k_fold = model_selection.KFold(n_splits=k, shuffle=True) acc_all, auc_all, f1_all = 0, 0, 0 for train_idx, test_idx in k_fold.split(X): X_train, X_test = X[train_idx], X[test_idx] Y_train, Y_test = Y[train_idx], Y[test_idx] clf = svm.SVC() clf.fit(X_train, Y_train.ravel()) predict = clf.predict(X_test) acc, auc, f1 = evaluation.eval(predict, Y_test) acc_all += acc auc_all += auc f1_all += f1 acc_all /= k auc_all /= k f1_all /= k print("acc: " + str(acc_all) + ' auc: ' + str(auc_all) + 'f1 ' + str(f1_all))
def train_lightgbm(X, Y, k=10): k_fold = model_selection.KFold(n_splits=k, shuffle=True) acc_all, auc_all, f1_all = 0, 0, 0 for train_idx, test_idx in k_fold.split(X): X_train, X_test = X[train_idx], X[test_idx] Y_train, Y_test = Y[train_idx], Y[test_idx] lgb_train = lgb.Dataset(X_train, Y_train, free_raw_data=False) lgb_test = lgb.Dataset(X_test, Y_test, reference=lgb_train, free_raw_data=False) params = { 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': {'binary_logloss', 'auc', 'acc'}, 'num_leaves': 5, 'max_depth': 6, 'min_data_in_leaf': 450, 'learning_rate': 0.1, 'feature_fraction': 0.9, 'bagging_fraction': 0.95, 'bagging_freq': 5, 'lambda_l1': 1, 'lambda_l2': 0.001, 'min_gain_to_split': 0.2, 'verbose': 5, 'is_unbalance': True } gbm = lgb.train(params, lgb_train, num_boost_round=10000, valid_sets=lgb_test, early_stopping_rounds=500) predict = gbm.predict(X_test, num_iteration=gbm.best_iteration) predict = np.round(predict) acc, auc, f1 = evaluation.eval(predict, Y_test) acc_all += acc auc_all += auc f1_all += f1 acc_all /= k auc_all /= k f1_all /= k print("acc: " + str(acc_all) + ' auc: ' + str(auc_all) + 'f1 ' + str(f1_all))
def checkpoint_eval(model, loss_function, scheduler, optimizer, val_iter, args, ignore_index, loss_tot, epoch, num_examples_seen): logger = logging.getLogger('vivo_logger') loss_avg = loss_tot / num_examples_seen logger.info('Train Loss: {:.4f}'.format(loss_avg)) logger.info('Starting evaluation.') evaluation_results = {} evaluation_results['valid'] = evaluation.eval(model, loss_function, val_iter, args, ignore_index=ignore_index) logger.info('\n' + pprint.pformat(evaluation_results)) # Update the scheduler. scheduler.step(evaluation_results['valid']['loss']) # Checkpoint checkpoint_path = os.path.join('log', args['checkpoint_path']) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) # datetime object containing current date and time now = datetime.now() dt_string = now.strftime("%d-%m-%Y_%H:%M:%S") logger.info('Saving Checkpoint: {}'.format(dt_string)) torch.save( { 'epoch': epoch, 'num_examples_seen': num_examples_seen, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'src_vocab': model.src_vocab, 'trg_vocab': model.trg_vocab, 'train_loss': loss_avg, 'evaluation_results': evaluation_results, 'args': args }, os.path.join(checkpoint_path, dt_string + '.pt')) return os.path.join(checkpoint_path, dt_string + '.pt')
def main(trainfile, modelfile, prepareTraindata, flag_print=False): root = sys.path[0] datafolder = os.path.join(root, 'data') tempdatafolder = os.path.join(datafolder, 'tempdata') if not os.path.exists(tempdatafolder): os.mkdir(tempdatafolder) tempfolder = os.path.join(root, 'temp') dicfolder = os.path.join(root, 'dic') modelfoler = os.path.join(root, 'models') dic_file = os.path.join(dicfolder, 'dic.txt') ebao_dic = tools.loadDic(dic_file) train_filenames = [(trainfile, 'un')] # un 多了一类medicine train_length = len(train_filenames) train_file_list = [] boundary4training_list = [] class4training_list = [] for i in range(train_length): train_file_list.append(os.path.join(datafolder, train_filenames[i][0])) boundary4training_list.append( os.path.join(tempdatafolder, 'boundary4training_' + str(i))) class4training_list.append( os.path.join(tempdatafolder, 'class4training_' + str(i))) test_filenames = [('jsd-test.txt', 'jsd'), ('ct-test.txt', 'un')] test_length = len(test_filenames) test_file_list = [] boundary4testing_list = [] class4testing_list = [] sen_ent4testing_list = [] for i in range(test_length): test_file_list.append(os.path.join(datafolder, test_filenames[i][0])) boundary4testing_list.append( os.path.join(tempdatafolder, 'boundary4testing_' + str(i))) class4testing_list.append( os.path.join(tempdatafolder, 'class4testing_' + str(i))) sen_ent4testing_list.append( os.path.join(tempdatafolder, 'sen_ent4testing_' + str(i))) sentence_list_test = [] sen_tags_list_test = [] if prepareTraindata == 'prepareTrain': for i in range(train_length): processing.generateFullTagFile(train_file_list[i], boundary4training_list[i], class4training_list[i], '', '0', ebao_dic, 'train', '0', train_filenames[i][1]) if flag_print: print 'Train data generated!' for i in range(test_length): sentence_list, sen_tags_list = processing.generateFullTagFile( test_file_list[i], boundary4testing_list[i], class4testing_list[i], sen_ent4testing_list[i], '1', ebao_dic, 'train', '0', test_filenames[i][1]) sentence_list_test.append(sentence_list) sen_tags_list_test.append(sen_tags_list) if flag_print: print 'Test data generated!' for k in range(train_length): if flag_print: print train_filenames[k][0] b_model = os.path.join(modelfoler, 'b-' + modelfile + '-' + str(k)) c_model = os.path.join(modelfoler, 'c-' + modelfile + '-' + str(k)) thread_b = threading.Thread(target=modelTraining, args=(b_model, boundary4training_list[k])) thread_b.start() thread_b.join() if flag_print: print train_filenames[k][0] + b_model + ' generated!' thread_c = threading.Thread(target=modelTraining, args=(c_model, class4training_list[k])) thread_c.start() thread_c.join() if flag_print: print train_filenames[k][0] + c_model + ' generated!' for j in range(test_length): if flag_print: print '\nTraindata: ' + train_filenames[k][0] + '\n' print '\nTestdata: ' + test_filenames[j][0] + '\n' boundary_result = boundary4testing_list[j] + '.result' os.system('crfsuite tag -m ' + b_model + ' ' + boundary4testing_list[j] + ' > ' + boundary_result) if flag_print: print 'boundary test result generated!' evaluation.eval(boundary4testing_list[j], boundary_result, 'boundary', sen_ent4testing_list[j], ebao_dic) if flag_print: print test_filenames[j][0] + 'boundary test evaluated!' class_result = class4testing_list[j] + '.result' os.system('crfsuite tag -m ' + c_model + ' ' + class4testing_list[j] + ' > ' + class_result) if flag_print: print 'class test result generated!' evaluation.eval(class4testing_list[j], class_result, 'class', sen_ent4testing_list[j], ebao_dic) if flag_print: print test_filenames[j][0] + 'class test evaluated!' # 用接口实现boundarymodel生成的数据对应的class向量,并对应每个句子中的实体预测其类别, post_processing = '1' processing.predictClassAfterBoundaryAndEval( boundary_result, sentence_list_test[j], sen_tags_list_test[j], c_model, ebao_dic, '0', test_filenames[j][1]) processing.predictClassAfterBoundaryAndEval( boundary_result, sentence_list_test[j], sen_tags_list_test[j], c_model, ebao_dic, '1', test_filenames[j][1]) if flag_print: print test_filenames[j][0] + 'combine develop evaluated!' print 'train end'
rbltemp = rblstack.pop() randombaseline.append(rbltemp) if len(rbltemp) > 1: randomsplit = random.randint(1, len(rbltemp) - 1) split1 = copy.deepcopy(rbltemp) split2 = set([]) for rbli in range(randomsplit): split2.add(split1.pop()) rblstack.append(split1) rblstack.append(split2) randombaseline = [x for x in randombaseline if len(x) > 1] # print(randombaseline) # Evaluate # Flat tree, random tree, test tree flateval = eval(goldlabeled, flatbaseline) aveprecision[0] += flateval[0] averecall[0] += flateval[1] avefscore[0] += flateval[2] randomeval = eval(goldlabeled, randombaseline) aveprecision[1] += randomeval[0] averecall[1] += randomeval[1] avefscore[1] += randomeval[2] testeval = eval(goldlabeled, testlabeled) aveprecision[2] += testeval[0] averecall[2] += testeval[1] avefscore[2] += testeval [2] # Print results to screen
def test2(data_path, config): evaluation.eval(data_path, config)
def evaluate(): evaluation.eval('D:/CMP/4th/GP/Test/Tom McKenzie - Directions/ref', 'D:/CMP/4th/GP/Test/Tom McKenzie - Directions/est', 'D:/CMP/4th/GP/Test/Tom McKenzie - Directions/')
def train_model_mini_batches_update(train, test, dictionary, params): ##################################################################################################### # training model using 50% of positive and 50% of negative data in mini batch ##################################################################################################### ids_train, labels_train, msg_train, code_train = train ids_test, labels_test, msg_test, code_test = test dict_msg, dict_code = dictionary print('Dictionary message: %i -- Dictionary code: %i' % (len(dict_msg), len(dict_code))) print('Training data') info_label(labels_train) pad_msg_train = padding_data(data=msg_train, dictionary=dict_msg, params=params, type='msg') pad_code_train = padding_data(data=code_train, dictionary=dict_code, params=params, type='code') print('Testing data') info_label(labels_test) pad_msg_test = padding_data(data=msg_test, dictionary=dict_msg, params=params, type='msg') pad_code_test = padding_data(data=code_test, dictionary=dict_code, params=params, type='code') # set up parameters params.cuda = (not params.no_cuda) and torch.cuda.is_available() del params.no_cuda params.filter_sizes = [int(k) for k in params.filter_sizes.split(',')] params.save_dir = os.path.join( params.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) params.vocab_msg, params.vocab_code = len(dict_msg), len(dict_code) if len(labels_train.shape) == 1: params.class_num = 1 else: params.class_num = labels_train.shape[1] params.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # create and train the defect model model = DefectNet(args=params) if torch.cuda.is_available(): model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=params.l2_reg_lambda) steps = 0 batches_test = mini_batches(X_msg=pad_msg_test, X_code=pad_code_test, Y=labels_test) write_log = list() for epoch in range(1, params.num_epochs + 1): # building batches for training model batches_train = mini_batches_update(X_msg=pad_msg_train, X_code=pad_code_train, Y=labels_train) for batch in batches_train: pad_msg, pad_code, labels = batch if torch.cuda.is_available(): pad_msg, pad_code, labels = torch.tensor( pad_msg).cuda(), torch.tensor( pad_code).cuda(), torch.cuda.FloatTensor(labels) else: pad_msg, pad_code, labels = torch.tensor(pad_msg).long( ), torch.tensor(pad_code).long(), torch.tensor(labels).float() optimizer.zero_grad() ftr, predict = model.forward(pad_msg, pad_code) loss = nn.BCELoss() loss = loss(predict, labels) loss.backward() optimizer.step() steps += 1 if steps % params.log_interval == 0: print('\rEpoch: {} step: {} - loss: {:.6f}'.format( epoch, steps, loss.item())) print('Epoch: %i ---Training data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_train, model=model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) print('Epoch: %i ---Testing data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_test, model=model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) write_log.append( 'Epoch - testing: %i --- Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (epoch, acc, prc, rc, f1, auc_)) if epoch % 5 == 0: save(model, params.save_dir, 'epoch', epoch) write_file(params.save_dir + '/log.txt', write_log)
def train_model(transformer, eventer, dataset, test_dataset, epochs, criterion, optimizer, SRC, TRG): print("training model...") # eval_loss1 = eval(transformer, eventer, test_dataset) # eval_loss2 = shuffle_eval(transformer, eventer, test_dataset) for epoch in range(epochs): transformer.train() eventer.train() cur_lr = get_lr(optimizer) print("Current lr ", cur_lr) total_loss = [] for index, (srcs, srcs_len, trgs, trgs_len, mask_tok, root) in enumerate(tqdm(dataset)): sent_num = srcs.size(1) srcs = srcs.cuda() mask_tok = mask_tok.cuda() root = root.cuda() # B * S trgs = trgs.cuda() trg_input = trgs[:, :-1] trg_input = trg_input.cuda() src_masks = [None] * sent_num trg_mask = None for i in range(sent_num): src_masks[i], trg_mask = create_masks(srcs[:, i], trg_input) for i in range(sent_num): src_masks[i] = src_masks[i].squeeze().cuda() src_masks = torch.stack([m for m in src_masks], dim=1) src_word_masks = src_masks.view(src_masks.size(0), 1, -1) # print(src_word_masks.size()) src_word_tok_masks = src_word_masks.repeat(1, src_word_masks.size(2), 1) # print("word_mask", src_word_tok_masks[0][0]) # print("mask_tok", mask_tok[0][0]) mask_tok = mask_tok * src_word_tok_masks.long() # print("mask_tok", mask_tok[0][0]) trg_mask = trg_mask.cuda() events = [None] * sent_num for i in range(sent_num): events[i] = transformer.encoder(srcs[:, i], src_masks[:, i].unsqueeze(1)) # print(events[i].size()) # print(src_masks[0,i]) # events[i] = pool(events[i], src_masks[:, i]) eventers = torch.cat([e for e in events], dim=-2) eventers = eventer(eventers, mask_tok) eventers = eventers.view(eventers.size(0), sent_num, -1, eventers.size(2)) feat_root = root.view(-1, 1, 1, 1).expand(-1, 1, eventers.size(2), eventers.size(3)) root_feat = torch.gather(eventers, 1, feat_root).squeeze(1) mask_root = root.view(-1, 1, 1).expand(-1, 1, src_masks.size(2)) root_masks = torch.gather(src_masks, 1, mask_root) # print(root_feat.size(), root_masks.size()) # pred = transformer.out(transformer.decoder(trg_input, sent_feats, mask_sent.unsqueeze(1), trg_mask)[0]) pred = transformer.out( transformer.decoder(trg_input, root_feat, root_masks, trg_mask)[0]) ys = trgs[:, 1:].contiguous().view(-1).cuda() optimizer.zero_grad() loss = criterion(pred.view(-1, pred.size(-1)), ys) loss.backward() torch.nn.utils.clip_grad_norm_(transformer.parameters(), 0.1) torch.nn.utils.clip_grad_norm_(eventer.parameters(), 0.1) optimizer.step() total_loss.append(loss.item()) print(f"Epoch {epoch} training loss : ", sum(total_loss) / len(total_loss)) eval_loss1 = eval(transformer, eventer, test_dataset) eval_loss2 = shuffle_eval(transformer, eventer, test_dataset) print(f"Epoch {epoch} evaluation loss : ", eval_loss1, eval_loss2) torch.save(transformer.state_dict(), f'models/transformer{epoch}.pth') torch.save(eventer.state_dict(), f'models/eventer{epoch}.pth')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, "intent": IntentProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) label_list = processor.get_labels() with open(FLAGS.output_dir + '/label_ids.txt', 'w') as output_file: for ind, label in enumerate(label_list): output_file.write(label + '\t' + str(ind) + '\n') model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples #evaluation on test set oos_id = -1 if 'oos' in label_list: oos_id = label_list.index('oos') eval(os.path.join(FLAGS.data_dir, "sentences.test.out"), output_predict_file, FLAGS.output_dir + '/label_ids.txt', oos_id=oos_id)
def train(config): # load data user_pos_train, user_pos_test = load_data(config) all_users = list(user_pos_train.keys()) all_users.sort() user_num = config['user_num'] item_num = config['item_num'] if not os.path.exists(config['output_dir']): os.mkdir(config['output_dir']) with open(os.path.join(config['output_dir'], 'config.json'), 'w') as fout: print(json.dumps(config), file=fout) train_log = open(os.path.join(config['output_dir'], 'train_log.txt'), 'w') # build model generator = GEN(config) discriminator = DIS(config) saver = tf.compat.v1.train.Saver(max_to_keep=1) model_path = os.path.join(config['output_dir'], 'model/SD-GAR') os.environ["CUDA_VISIBLE_DEVICES"] = config['CUDA_VISIBLE_DEVICES'] config_tf = tf.compat.v1.ConfigProto() config_tf.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config_tf) # init variable sess.run(tf.compat.v1.global_variables_initializer()) # init embeddings print("%s; initializing embeddings and constructing alias table..." % get_current_time()) prob_users, prob_items = get_init_embeddings(config) alias_table = AliasTable(prob_users, prob_items) sampler_d = SamplerD(config, alias_table=alias_table, generator=generator, sess=sess) print("%s; finished" % get_current_time()) # minimax training best, best_gen = 0., 0. global Train_d_cnt, Gen_data_for_d_time, Train_d_time global Train_g_cnt, Gen_data_for_g_time, Train_g_time mretic_name = [ 'P@3', 'P@5', 'P@10', 'P@50', 'NDCG@3', 'NDCG@5', 'NDCG@10', 'NDCG@50', 'MRR' ] for epoch in range(76): # train discriminator if epoch > 0: Train_d_cnt += 1 batch_num = 0 # the loss incorporates four parts including total loss, positive sample loss, negtive sample loss and regularization loss_arr = np.array([0.] * 4) time_start = time.time() sampler_d.generate_data(user_pos_train, config['dis_sample_num'], shuffle=True) sampler_d.generate_neg_scores() Gen_data_for_d_time += time.time() - time_start data_len = len(sampler_d.data) time_start = time.time() index = 0 while index < data_len: batch = sampler_d.get_next_batch(config['batch_size']) users = batch['users'] pos_items = batch['pos_items'] neg_items = batch['neg_items'] neg_scores = batch['neg_scores'] index += config['batch_size'] _, batch_loss_list = sess.run( [discriminator.get_update(), discriminator.get_loss()], feed_dict={ discriminator.u: users, discriminator.pos_i: pos_items, discriminator.neg_i: neg_items, discriminator.g_scores: neg_scores }) batch_loss_list[1] = np.mean(batch_loss_list[1]) batch_loss_list[2] = np.mean(batch_loss_list[2]) batch_loss_arr = np.array(batch_loss_list) loss_arr += batch_loss_arr batch_num += 1 Train_d_time += time.time() - time_start loss_arr = loss_arr / batch_num curr_time = get_current_time() buf = "%s; epoch: %s; loss: %s; pos_loss: %s; neg_loss: %s; regular_loss: %s" % ( curr_time, epoch, loss_arr[0], loss_arr[1], loss_arr[2], loss_arr[3]) output_to_file(buf, train_log) if epoch % 5 == 0: result = eval(sess, discriminator, user_pos_train, user_pos_test) curr_time = get_current_time() buf = "\t%s; metrics: \t%s" % (curr_time, '\t'.join( ["%7s" % x for x in mretic_name])) output_to_file(buf, train_log) buf = "\t%s; performance:\t%s" % (curr_time, '\t'.join( ["%.5f" % x for x in result])) output_to_file(buf, train_log) ndcg_50 = result[7] if ndcg_50 > best: buf = '\tbest ndcg@50, saving the current model' output_to_file(buf, train_log) best = ndcg_50 saver.save(sess, model_path) f_gen_embeddings = open( os.path.join(config['output_dir'], 'gen_embeddings.txt'), 'wb') pickle.dump( [alias_table.prob_users, alias_table.prob_items], f_gen_embeddings) if epoch % 5 == 0: Train_g_cnt += 1 print("%s; computing partition function..." % get_current_time()) z_u, d_logits = get_partition_funciton(sess, discriminator, generator, alias_table, config) # update user embeddings print("%s; computing u..." % get_current_time()) prob_users = get_new_user_embeddings(sess, discriminator, generator, z_u, alias_table, config) print("%s; update alias table u..." % get_current_time()) time_start = time.time() # update user alias table alias_table.update_users(prob_users) Train_g_time += time.time() - time_start print("%s; finish updating..." % get_current_time()) print("%s; computing v..." % get_current_time()) # update item embeddings prob_items = get_new_item_embeddings(sess, discriminator, generator, z_u, alias_table, config) print("%s; update alias table v..." % get_current_time()) # update item alias table time_start = time.time() alias_table.update_items(prob_items) Train_g_time += time.time() - time_start print("%s; finish updating..." % get_current_time()) output_to_file( "cost on generating data for d: %s" % (Gen_data_for_d_time / Train_d_cnt), train_log) output_to_file("cost on training d: %s" % (Train_d_time / Train_d_cnt), train_log) output_to_file( "cost on generating data for g: %s" % (Gen_data_for_g_time / Train_g_cnt), train_log) output_to_file("cost on training g: %s" % (Train_g_time / Train_g_cnt), train_log) train_log.close()
def evaluate(self, n_samples=100): self.model.eval() evaluation.eval(self.kg_val, self.model, n_samples)
def train_model_loss_undersampling(project, train, test, dictionary, params): ##################################################################################################### # training model using penalized classification technique (modify loss function) and under sampling technique ##################################################################################################### ids_train, labels_train, msg_train, code_train = train ids_test, labels_test, msg_test, code_test = test dict_msg, dict_code = dictionary print('Dictionary message: %i -- Dictionary code: %i' % (len(dict_msg), len(dict_code))) print('Training data') info_label(labels_train) pad_msg_train = padding_data(data=msg_train, dictionary=dict_msg, params=params, type='msg') pad_code_train = padding_data(data=code_train, dictionary=dict_code, params=params, type='code') print('Testing data') info_label(labels_test) pad_msg_test = padding_data(data=msg_test, dictionary=dict_msg, params=params, type='msg') pad_code_test = padding_data(data=code_test, dictionary=dict_code, params=params, type='code') # set up parameters params.cuda = (not params.no_cuda) and torch.cuda.is_available() del params.no_cuda params.filter_sizes = [int(k) for k in params.filter_sizes.split(',')] params.save_dir = os.path.join( params.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) params.vocab_msg, params.vocab_code = len(dict_msg), len(dict_code) if len(labels_train.shape) == 1: params.class_num = 1 else: params.class_num = labels_train.shape[1] params.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # create and train the defect model model = DefectNet(args=params) if torch.cuda.is_available(): model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=params.l2_reg_lambda) steps = 0 batches_test = mini_batches(X_msg=pad_msg_test, X_code=pad_code_test, Y=labels_test) for epoch in range(1, params.num_epochs + 1): # building batches for training model batches_train = mini_batches_undersampling(X_msg=pad_msg_train, X_code=pad_code_train, Y=labels_train) for batch in batches_train: pad_msg, pad_code, labels = batch if torch.cuda.is_available(): pad_msg, pad_code, labels = torch.tensor( pad_msg).cuda(), torch.tensor( pad_code).cuda(), torch.cuda.FloatTensor(labels) else: pad_msg, pad_code, labels = torch.tensor(pad_msg).long( ), torch.tensor(pad_code).long(), torch.tensor(labels).float() optimizer.zero_grad() predict = model.forward(pad_msg, pad_code) if project == 'openstack': loss = custom_loss(y_pred=predict, y_true=labels, weights=[0.1, 1]) loss.backward() optimizer.step() elif project == 'qt': print( 'We need to find the weights for negative and positive labels later' ) exit() else: loss = nn.BCELoss() loss = loss(predict, labels) loss.backward() optimizer.step() steps += 1 if steps % params.log_interval == 0: print('\rEpoch: {} step: {} - loss: {:.6f}'.format( epoch, steps, loss.item())) print('Epoch: %i ---Training data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_train, model=model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) print('Epoch: %i ---Testing data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_test, model=model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) if epoch % 5 == 0: save(model, params.save_dir, 'epoch', epoch)
current_state[key] = netE_dict[key] trainer.netE.load_state_dict(current_state) if args.eval_domain == "target": eval_domain = trainer.target_val_loader_eval_mode name = cfg.DATA_LOADER.TARGET + "_VAL" elif args.eval_domain == "source": eval_domain = trainer.src_train_loader_eval_mode name = cfg.DATA_LOADER.SOURCE + "_TRAIN" else: print("Not valid domain: {}".format(args.eval_domain)) exit() if args.save_feature: mean_acc, preds, probs, features = evaluation.eval( 1, name, eval_domain, trainer.netG, trainer.netE, return_probs=True, return_features=True) else: mean_acc, preds, probs = evaluation.eval(1, name, eval_domain, trainer.netG, trainer.netE, return_probs=True, return_features=False) if mean_acc is not None: if (trainer.distributed == False) or (dist.get_rank() == 0): trainer.logger.info(mean_acc) if not os.path.exists(os.path.dirname(args.output_path)):
def main(): opt = parse_args() print(json.dumps(vars(opt), indent=2)) rootpath = opt.rootpath testCollection = opt.testCollection resume_file = os.path.join(opt.model_path) if not os.path.exists(resume_file): logging.info(resume_file + ' not exists.') sys.exit(0) # Load checkpoint logger.info('loading model...') checkpoint = torch.load(resume_file) epoch = checkpoint['epoch'] best_perf = checkpoint['best_perf'] config = checkpoint['config'] if hasattr(config, 't2v_w2v'): w2v_feature_file = os.path.join(rootpath, 'word2vec', 'flickr', 'vec500flickr30m', 'feature.bin') config.t2v_w2v.w2v.binary_file = w2v_feature_file # Construct the model model = get_model('w2vvpp')(config) print(model.vis_net) print(model.txt_net) model.load_state_dict(checkpoint['model']) print("=> loaded checkpoint '{}' (epoch {}, best_perf {})".format( resume_file, epoch, best_perf)) vis_feat_file = BigFile( os.path.join(rootpath, testCollection, 'FeatureData', config.vid_feat)) vis_ids = map( str.strip, open( os.path.join(rootpath, testCollection, 'VideoSets', testCollection + '.txt'))) vis_loader = data.vis_provider({ 'vis_feat': vis_feat_file, 'vis_ids': vis_ids, 'pin_memory': True, 'batch_size': opt.batch_size, 'num_workers': opt.num_workers }) vis_embs = None for query_set in opt.query_sets.split(','): output_dir = os.path.join(rootpath, testCollection, 'SimilarityIndex', query_set, opt.sim_name) pred_result_file = os.path.join(output_dir, 'id.sent.score.txt') if util.checkToSkip(pred_result_file, opt.overwrite): continue util.makedirs(output_dir) if vis_embs is None: logger.info('Encoding videos') vis_embs, vis_ids = evaluation.encode_vis(model, vis_loader) capfile = os.path.join(rootpath, testCollection, 'TextData', query_set) # load text data txt_loader = data.txt_provider({ 'capfile': capfile, 'pin_memory': True, 'batch_size': opt.batch_size, 'num_workers': opt.num_workers }) logger.info('Encoding %s captions' % query_set) txt_embs, txt_ids = evaluation.encode_txt(model, txt_loader) t2i_matrix = evaluation.compute_sim(txt_embs, vis_embs, measure=config.measure) inds = np.argsort(t2i_matrix, axis=1) if testCollection == 'msrvtt10ktest': label_matrix = np.zeros(inds.shape) for index in range(inds.shape[0]): ind = inds[index][::-1] label_matrix[index][np.where( np.array(vis_ids)[ind] == txt_ids[index].split('#')[0]) [0]] = 1 (r1, r5, r10, medr, meanr, mir, mAP) = evaluation.eval(label_matrix) sum_recall = r1 + r5 + r10 tempStr = " * Text to video:\n" tempStr += " * r_1_5_10: {}\n".format( [round(r1, 3), round(r5, 3), round(r10, 3)]) tempStr += " * medr, meanr, mir: {}\n".format( [round(medr, 3), round(meanr, 3), round(mir, 3)]) tempStr += " * mAP: {}\n".format(round(mAP, 3)) tempStr += " * " + '-' * 10 print(tempStr) open(os.path.join(output_dir, 'perf.txt'), 'w').write(tempStr) start = time.time() with open(pred_result_file, 'w') as fout: for index in range(inds.shape[0]): ind = inds[index][::-1] fout.write(txt_ids[index] + ' ' + ' '.join( [vis_ids[i] + ' %s' % t2i_matrix[index][i] for i in ind]) + '\n') print('writing result into file time: %.3f seconds\n' % (time.time() - start))
configuration_search_model_path, retention_configuration, LR_BERT=args.LR_BERT, ) elif args.EVAL_ONLY: ## Do the evaluation on the Dev data dev_x, dev_y = dataset_parser.get_dev_data() ## Obtain number of layers from the config file with open(args.BERT_CONFIG_PATH, 'r') as bc: bert_config = json.loads(bc.read()) num_layers = bert_config['num_hidden_layers'] loss, accuracy = eval(args, dev_x, dev_y, num_layers, num_classes, seq_len) with open(LOGFILE_PATH, 'a') as fp: fp.write("\nloss : " + str(loss)) fp.write("\naccuracy : " + str(accuracy * 100.0)) elif args.PREDICT_ONLY: ## Do the prediction on the test data test_x, test_y = dataset_parser.get_test_data() ## Obtain number of layers from the config file with open(args.BERT_CONFIG_PATH, 'r') as bc: bert_config = json.loads(bc.read()) num_layers = bert_config['num_hidden_layers']
def find_path_func(X): # COOL缩放过 COOL, laplace_w, direction_w, magnitude_w = X print(COOL, laplace_w, direction_w, magnitude_w) COOL = int(COOL * 100) img_G_path = 'gary.jpg' img_RGB_path = 'rbg.jpg' contours_g, img_rgb = findContours_g(img_G_path, img_RGB_path) # 高斯滤波后生成特征图 img_rgb_gaussian = cv2.GaussianBlur(img_rgb, (5, 5), 0) # 高斯滤波 scissors = Scissors(img_rgb_gaussian, laplace_w=laplace_w, direction_w=direction_w, magnitude_w=magnitude_w, use_dynamic_features=False) # 灰度图寻找较大边缘 contours_g_out = [] scissors_list = [] cool_list = [] for k in contours_g: if len(k) > 150: contours_g_out.append(k) scissors_list.append(scissors) cool_list.append(COOL) # # 单线进程方法!! # out_end = [] # for c in contours_g_out: # o = Intelligent_scissors(c, scissors, COOL) # out_end.append(o) # # 单线进程方法2!! Intelligent_scissors_par = partial(Intelligent_scissors, scissors=scissors, cool_number=COOL) print('map') out_end =list(map(Intelligent_scissors_par,contours_g_out)) print(out_end) # # 找边缘,多进程 # p = Pool() # out_end = p.map(Intelligent_scissors, contours_g_out, scissors_list, cool_list) # print(out_end) #找边缘,多进程,偏函数 # Intelligent_scissors_par=partial(Intelligent_scissors,scissors=scissors,cool_number=COOL) # p = Pool(14) # out_end = p.map(Intelligent_scissors_par, contours_g_out) # p.join() # print(out_end) # # 找边缘,多线程,偏函数 # Intelligent_scissors_par=partial(Intelligent_scissors,scissors=scissors,cool_number=COOL) # p =ThreadPool() # out_end = p.map(Intelligent_scissors_par, contours_g_out) # p.join() # print(out_end) # 创造一个遮罩 mask = np.zeros(img_rgb.shape).astype(img_rgb.dtype) mask_out = cv2.drawContours(mask, out_end, -1, (255, 255, 255), -1) # 画边缘 # 画 mask_out_path = str(COOL) + '_' + str(laplace_w) + '_' + str(direction_w) + '_' + str( magnitude_w) + '_' + img_RGB_path mask_out_path = os.path.join(r'/tmp/pycharm_project_836/img_out', mask_out_path) cv2.imwrite(mask_out_path, mask_out) # iou act_path = 'rbg_1.jpg' iou, pa = eval(mask_out_path, act_path) return -iou # 最优化取最小值!!取负数!!
def find_path_one_img(img_PATH,COOL, laplace_w, direction_w, magnitude_w): img_G, img_RGB=img_PATH img_G_path = os.path.join(r'/home/zhny/pycharm01/gary1', img_G) img_RGB_path = os.path.join(r'/home/zhny/pycharm01/rbg1', img_RGB) img_mark_path = os.path.join(r'/home/zhny/pycharm01/mark_or', img_RGB) # print(img_G_path, img_RGB_path) contours_g, img_rgb = findContours_g(img_G_path, img_RGB_path) # 获取初始二值化路径。返回路径和rgb图 # 高斯滤波后生成特征图 img_rgb_gaussian = cv2.GaussianBlur(img_rgb, (5, 5), 0) # 高斯滤波 scissors = Scissors(img_rgb_gaussian, laplace_w=laplace_w, direction_w=direction_w, magnitude_w=magnitude_w, use_dynamic_features=False) # 灰度图寻找较大边缘 contours_g_out = [] # scissors_list = [] # cool_list = [] for k in contours_g: if len(k) > 50:#50! # print(len(k)) contours_g_out.append(k) # scissors_list.append(scissors) # cool_list.append(COOL) # 叠加显示 img_rgb_c = cv2.drawContours(img_rgb, contours_g_out, -1, (0, 255, 0), 3) # # 单线进程方法2!! Intelligent_scissors_par = partial(Intelligent_scissors, scissors=scissors, cool_number=COOL) # print('map') # out_end = list(map(Intelligent_scissors_par, contours_g_out)) # print(out_end) #多进程方法 # print('map') p2 = Pool() out_end = p2.map(Intelligent_scissors_par, contours_g_out) # 画边缘 img_out = cv2.drawContours(img_rgb_c, out_end, -1, (255, 0, 0), 3) # 创造一个遮罩 mask = np.zeros(img_rgb.shape).astype(img_rgb.dtype) mask_out = cv2.drawContours(mask, out_end, -1, (255, 255, 255), -1) # 画边缘 #处理路径并保存 path_mask = str(COOL) + '_' + str(laplace_w) + '_' + str(direction_w) + '_' + str(magnitude_w) # path_out = str(COOL) + '_' + str(laplace_w) + '_' + str(direction_w) + '_' + str(magnitude_w) mask_dire=os.path.join(r'/home/zhny/pycharm01/',path_mask) out_dire=os.path.join(mask_dire,'out_rgb/') if os.path.exists(mask_dire) == False: # 查看是否有文件夹 os.mkdir(mask_dire) if os.path.exists(out_dire) == False: # 查看是否有文件夹 os.mkdir(out_dire) img_out_path = os.path.join(out_dire, img_RGB) mask_out_path = os.path.join(mask_dire, img_RGB) cv2.imwrite(img_out_path, img_out) cv2.imwrite(mask_out_path, mask_out) # iou iou, pa = eval(mask_out_path, img_mark_path) return -iou # 最优化取最小值!!取负数!!