def run_task(data_directory, task_id): """ Parse data, build model, and run training and testing for a single task. :param data_directory: Path to train and test data. :param task_id: Task to evaluate """ print("Train and test for task %d ..." % task_id) # Parse data train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id)) test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id)) dictionary = {"nil": 0} # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES) # Questions shape: (14 (see parser.py), NUM_SAMPLES) # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES) train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss = build_model(general_config) if general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) else: train(train_story, train_questions, train_qstory, memory, model, loss, general_config) test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
def loadModel(): global model, train_data, trainNameList start = time.clock() model = mltest.load_model() end = time.clock() print('loadModel time: %s Seconds' % (end - start)) start = time.clock() test = train_data[trainNameList.index("75119")] mltest.test(model, FloorPlan(test, train=True)) end = time.clock() print('test Model time: %s Seconds' % (end - start))
def train(): graph = Graph(is_train=True) graph.create_model() sv = tf.train.Supervisor(logdir=get_path(args.logdir), global_step=graph.global_step, saver=graph.saver, save_model_secs=600) sess = sv.PrepareSession() losses = [] while True: input_feed = get_data(graph.inputs_ph) fetches = [graph.train_op, graph.loss, graph.global_step] _, loss, step = sess.run(fetches, input_feed) losses.append(loss) if step % 100 == 0: print('Loss\t%s.' % np.mean(losses)) losses = [] from model.test import test test()
def run_task(data_directory, task_id): """ Parse data, build model, and run training and testing for a single task. :param data_directory: Path to train and test data. :param task_id: Task to evaluate """ print("Train and test for task %d ..." % task_id) # Parse data train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id)) test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id)) dictionary = {"nil": 0} # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES) # Questions shape: (14 (see parser.py), NUM_SAMPLES) # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES) train_story, train_questions, train_qstory = parse_babi_task( train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task( test_files, dictionary, False) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss = build_model(general_config) if general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) else: train(train_story, train_questions, train_qstory, memory, model, loss, general_config) test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
), batch_size=1000, shuffle=True) visualize_dataset_tensorboard(train_loader, writer) lr = 0.01 epochs = 1 model = Net() # model_state_dict = torch.load(str(URLs.RESULTS_PATH / 'model.pth')) # model.load_state_dict(model_state_dict) opt = optim.SGD(model.parameters(), lr, momentum=0.5) # opt_state_dict = torch.load(str(URLs.RESULTS_PATH / 'optimizer.pth')) # opt.load_state_dict(opt_state_dict) train_losses = [] train_counter = [] test_losses = [] test_counter = [i * len(train_loader.dataset) for i in range(epochs + 1)] device = torch.device("cpu") if torch.cuda.is_available(): device = torch.device('cuda:0') model.cuda() start_time = time.time() test(model, test_loader, device, test_losses) for epoch in range(1, epochs + 1): train(epoch, model, opt, train_loader, device, train_losses, train_counter, 10, writer) test(model, test_loader, device, test_losses) writer.close() print(f'Running time: {time.strftime("%M:%S", time.gmtime(time.time() - start_time))} sec') plot_perf(train_counter, train_losses, test_counter, test_losses)
def main(): args, other = get_args() experiment = args.experiment anserini_path = args.anserini_path datasets_path = os.path.join(args.data_path, 'datasets') if not os.path.isdir('log'): os.mkdir('log') if args.mode == 'training': train(args) elif args.mode == 'check_performance': check_dev_performance(args) elif args.mode == 'inference': scores = test(args) print_scores(scores) else: folds_path = os.path.join(anserini_path, 'src', 'main', 'resources', 'fine_tuning', args.folds_file) qrels_path = os.path.join(anserini_path, 'src', 'main', 'resources', 'topics-and-qrels', args.qrels_file) topK = int(other[0]) alpha = float(other[1]) beta = float(other[2]) gamma = float(other[3]) test_folder_set = int(other[4]) mode = other[5] # Divide topics according to fold parameters train_topics, test_topics, all_topics = [], [], [] with open(folds_path) as f: folds = json.load(f) for i in range(0, len(folds)): all_topics.extend(folds[i]) if i != test_folder_set: train_topics.extend(folds[i]) else: test_topics.extend(folds[i]) if args.interactive: sentid2text = query_sents(args) test(args) # inference over each sentence collection_path = os.path.join( datasets_path, args.collection + '.csv') if not args.interactive else args.interactive_path predictions_path = os.path.join( args.data_path, 'predictions', 'predict.' + experiment) if not args.interactive else os.path.join( args.data_path, 'predictions', args.predict_path) top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25( collection_path) score_dict = load_bert_scores(predictions_path, q_dict, sent_dict) if args.interactive: top_rank_docs = visualize_scores(collection_path, score_dict) with open(os.path.join(args.data_path, 'query_sent_scores.csv'), 'w') as scores_file: for doc in top_rank_docs[:100]: scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format( doc[0], sentid2text[doc[0]], doc[1], doc[2], 'BM25' if doc[3] > 0 else 'BERT')) for doc in top_rank_docs[-100:]: scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format( doc[0], sentid2text[doc[0]], doc[1], doc[2], 'BM25' if doc[3] > 0 else 'BERT')) if not os.path.isdir('runs'): os.mkdir('runs') if mode == 'train': topics = train_topics if not args.interactive else list( q_dict.keys()) # Grid search for best parameters for a in np.arange(0.0, alpha, 0.1): for b in np.arange(0.0, beta, 0.1): for g in np.arange(0.0, gamma, 0.1): calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.train', topics, top_doc_dict, doc_bm25_dict, topK, a, b, g) base = 'runs/run.' + experiment + '.cv.train' os.system( '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base' .format(anserini_path, qrels_path, base)) with open('eval.base', 'r') as f: for line in f: metric, qid, score = line.split('\t') map_score = float(score) print(test_folder_set, round(a, 2), round(b, 2), round(g, 2), map_score) elif mode == 'test': topics = test_topics if not args.interactive else list( q_dict.keys()) calc_q_doc_bert( score_dict, 'run.' + experiment + '.cv.test.' + str(test_folder_set), topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma) else: topics = all_topics if not args.interactive else list( q_dict.keys()) calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.all', topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma)
def detect(): """Graph based machine learning for bot detection""" data = request.get_json() print(data) return str(test(data))
def main(): args, other = get_args() experiment = args.experiment anserini_path = args.anserini_path datasets_path = os.path.join(args.data_path, 'datasets') if args.mode == 'training': train(args) elif args.mode == 'inference': test(args) else: if args.interactive: # TODO: sync with HiCAL from utils.query import query_sents, visualize_scores sentid2text, hits = query_sents(args, K=10) test(args) else: folds_path = os.path.join(args.data_path, 'folds', '{}-folds.json'.format(args.collection)) qrels_path = os.path.join(args.data_path, 'qrels', 'qrels.{}.txt'.format(args.collection)) topK = int(other[0]) alpha = float(other[1]) beta = float(other[2]) gamma = float(other[3]) test_folder_set = int(other[4]) mode = other[5] # Divide topics according to fold parameters train_topics, test_topics, all_topics = [], [], [] with open(folds_path) as f: folds = json.load(f) for i in range(0, len(folds)): all_topics.extend(folds[i]) if i != test_folder_set: train_topics.extend(folds[i]) else: test_topics.extend(folds[i]) collection_path = os.path.join( datasets_path, '{}_sents.csv'.format(args.collection)) predictions_path = os.path.join(args.data_path, 'predictions', 'predict.' + experiment) top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25( collection_path) score_dict = load_bert_scores(predictions_path, q_dict, sent_dict) if not os.path.isdir('runs'): os.mkdir('runs') if mode == 'train': for a in np.arange(0.0, alpha, 0.1): for b in np.arange(0.0, beta, 0.1): for g in np.arange(0.0, gamma, 0.1): calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.train', train_topics, top_doc_dict, doc_bm25_dict, topK, a, b, g) base = 'runs/run.' + experiment + '.cv.train' os.system( '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base' .format(anserini_path, qrels_path, base)) with open('eval.base', 'r') as f: for line in f: metric, qid, score = line.split('\t') map_score = float(score) print(test_folder_set, round(a, 2), round(b, 2), round(g, 2), map_score) elif mode == 'test': calc_q_doc_bert( score_dict, 'run.' + experiment + '.cv.test.' + str(test_folder_set), test_topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma)
model = GMAN(SE, args, bn_decay=0.1) loss_criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), args.learning_rate) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.decay_epoch, gamma=0.9) parameters = count_parameters(model) log_string(log, 'trainable parameters: {:,}'.format(parameters)) if __name__ == '__main__': start = time.time() loss_train, loss_val = train(model, args, log, loss_criterion, optimizer, scheduler) plot_train_val_loss(loss_train, loss_val, 'figure/train_val_loss.png') trainPred, valPred, testPred = test(args, log) end = time.time() log_string(log, 'total time: %.1fmin' % ((end - start) / 60)) log.close() trainPred_ = trainPred.numpy().reshape(-1, trainY.shape[-1]) trainY_ = trainY.numpy().reshape(-1, trainY.shape[-1]) valPred_ = valPred.numpy().reshape(-1, valY.shape[-1]) valY_ = valY.numpy().reshape(-1, valY.shape[-1]) testPred_ = testPred.numpy().reshape(-1, testY.shape[-1]) testY_ = testY.numpy().reshape(-1, testY.shape[-1]) # Save training, validation and testing datas to disk l = [trainPred_, trainY_, valPred_, valY_, testPred_, testY_] name = ['trainPred', 'trainY', 'valPred', 'valY', 'testPred', 'testY'] for i, data in enumerate(l): np.savetxt('./figure/' + name[i] + '.txt', data, fmt='%s')
models = to.load('./models', map_location=device) for i in range(2, 11): test_classes = range(i) ds_test = tv.datasets.MNIST(root='./', train=False, transform=tv.transforms.ToTensor(), download=True) ds_test.targets, ds_test.data = filter_classes(ds_test, test_classes) dl_test = to.utils.data.DataLoader(ds_test, batch_size=256) model_name = "model_%d_" % i args = models[model_name + "args"] kwargs = models[model_name + "kwargs"] model = NN(*args, **kwargs).to(device) model.load_state_dict(models[model_name + "state_dict"]) print("Testing model %d..." % i) acc, _ = test(model, dl_test, closed_set_accuracy, device) print("Testing accuracy %f" % acc) if (mode == "sample"): batch_size = 1 train_classes = range(9) ds = tv.datasets.MNIST(root='./', train=False, transform=tv.transforms.ToTensor(), download=True) ds.targets, ds.data = filter_classes(ds, train_classes) dl = to.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=True) for x, y in islice(dl, 1): x = x.view(batch_size, -1).cpu().numpy() y = y.cpu().numpy() print(json.dumps(x[0].tolist()))
def createTest(hill, city): vytvorenyTest = test(1, hill, city)
time_str = time.strftime("%m%d-%H%M", time.localtime(time.time())) rootdir = "{}/{}/{}-semi-{}-fixed-{}-ratio-{}-lr-{}/".format( "/data/yangy/data_prepare/result", hp['dataname'], time_str, str(hp['semi']), str(hp['fixed']), str(hp['ratio']), str(args.lr)) os.makedirs(rootdir, exist_ok=True) hp['rootdir'] = rootdir np.save('{}parameter.npy'.format(rootdir), hp) # 获取模型 my_models = load_model(hp) #获取数据 train_data, test_data = load_data(hp) #预训练模型 #my_models = pre_train(hp, my_models, train_data, test_data) # 预训练结果 #result = test(test_data,hp,my_models,'pretrain') # 训练模型 my_models = train(hp, my_models, train_data) # 保存模型 save_model(my_models, rootdir) # 测试模型 result = test(test_data, hp, my_models, 'final')
# For fix slurm cannot load PYTHONPATH import sys sys.path.insert(0, '/ihome/hdaqing/saz31/sanqiang/text_simplification') import os from model.test import test from model.model_config import SubTestWikiEightRefConfig, SubTestWikiEightRefConfigV2, SubTestWikiEightRefConfigV2Sing from util.arguments import get_args args = get_args() if __name__ == '__main__': mapper = {} path = '/zfs1/hdaqing/saz31/text_simplification/' + args.output_folder #'/Users/zhaosanqiang916/git/acl' #'/zfs1/hdaqing/saz31/text_simplification/' for root, dirs, files in os.walk(path): for file in files: if 'model' in root and file.endswith('.index'): sid = file.index('ckpt-') + len('ckpt-') eid = file.rindex('.index') step = file[sid:eid] resultpath = root + '/../result/eightref_test/joshua_target_' + step + '.txt' if not os.path.exists(resultpath): ckpt = root + '/' + file[:-len('.index')] test(SubTestWikiEightRefConfig(), ckpt) test(SubTestWikiEightRefConfigV2(), ckpt) test(SubTestWikiEightRefConfigV2Sing(), ckpt)
def pre_train(hp, models, train_data, test_data): print("----------start pre-training models----------") view_num = len(models) par = [] for i in range(view_num): models[i].cuda() models[i].train() par.append({'params': models[i].parameters()}) optimizer = optim.Adam(par, lr=hp['pre_lr']) scheduler = StepLR(optimizer, step_size=10, gamma=0.5) batch_size = hp['pre_size'] loss_func = nn.MSELoss() for epoch in range(hp['pre_epoch']): scheduler.step() running_loss = 0.0 data_num = 0 for i in range(view_num): models[i].train() for i in range(3): data = train_data[i] if data == None: continue bag_num = len(data) data_num += bag_num max_step = int(bag_num / batch_size) while max_step * batch_size < bag_num: max_step += 1 for step in range(max_step): # get data step_data = get_batch( data, list( range(step * batch_size, min((step + 1) * batch_size, bag_num))), hp) x1, x2, bag1, bag2, y = step_data b_y = Variable(y).cuda() loss = 0 if i == 0 or i == 2: x_img = Variable(x1).cuda() h1, _, _ = models[0](x_img, bag1) loss += loss_func(h1, b_y) if i == 0 or i == 1: x_text = Variable(x2).cuda() h2, _, _ = models[1](x_text, bag2) loss += loss_func(h2, b_y) running_loss += loss.data * x2.size(0) # backward optimizer.zero_grad() loss.backward() optimizer.step() # epoch loss epoch_loss = running_loss / data_num print('epoch {}/{} | Loss: {:.9f}'.format(epoch, hp['pre_epoch'], epoch_loss)) rootpath = "{}{}/".format(hp['modelpath'], str(epoch + 1)) os.makedirs(rootpath, exist_ok=True) save_model(models, rootpath) hp['rootdir'] = rootpath result = test(test_data, hp, models, 'pretrain') print("----------end pre-training models----------") return models
#-*- coding:utf-8 -*- import model.train as train import model.test as test import tensorflow as tf if __name__ == '__main__': train.train() tf.reset_default_graph() # Initialize graph test.test()