def train(parameters): # 模型保存的目录 eg: './results/docred-dev/docred_basebert_full/' model_folder = setup_log(parameters, parameters['save_pred'] + '_train') set_seed(parameters['seed']) ################################### # Data Loading ################################### # if parameters['re_train']: # print('\nLoading mappings ...') # train_loader = load_mappings(parameters['remodelfile']) # else: print('加载训练数据 ...') train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds'], parameters=parameters) train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__() print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() # print("prune_recall-->", str(prune_recall)) ################################### # Training ################################### trainer = Trainer(train_loader, parameters, { 'train': train_data, 'test': test_data }, model_folder, prune_recall) trainer.run()
def train(parameters): model_folder = setup_log(parameters, 'train') set_seed(parameters['seed']) ################################### # Data Loading ################################### print('Loading training data ...') train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds']) train_data = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__() print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters) test_loader() test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() ################################### # Training ################################### trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder) trainer.run() if parameters['plot']: plot_learning_curve(trainer, model_folder) if parameters['save_model']: save_model(model_folder, trainer, train_loader)
def train(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_train') set_seed(parameters['seed']) ################################### # Data Loading ################################### # if parameters['re_train']: # print('\nLoading mappings ...') # train_loader = load_mappings(parameters['remodelfile']) # else: flag=False processed_dataset=parameters['remodelfile'] if flag and os.path.exists(os.path.join(processed_dataset, 'train_loader.pkl')): with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'rb') as f: train_loader = pkl.load(f) with open(os.path.join(processed_dataset, 'train_data.pkl'), 'rb') as f: train_data = pkl.load(f) with open(os.path.join(processed_dataset, 'test_data.pkl'), 'rb') as f: test_data = pkl.load(f) with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'rb') as f: prune_recall = pkl.load(f) # print('Loading training data ...') else: train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds'], parameters=parameters) train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__() # operate_data(train_data, "train_data.json") print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'wb') as f: pkl.dump(train_loader, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'train_data.pkl'), 'wb') as f: pkl.dump(train_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'test_data.pkl'), 'wb') as f: pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'wb') as f: pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL) # ################################### # Training ################################### trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder, prune_recall) trainer.run() write_metrics(trainer,model_folder) if parameters['plot']: plot_learning_curve(trainer, model_folder) plot_P_R(trainer, model_folder)
def _test(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_test') print('\nLoading mappings ...') train_loader = load_mappings(parameters['remodelfile']) flag=True print('\nLoading testing data ...') processed_dataset=parameters['remodelfile'] if flag and os.path.exists(os.path.join(processed_dataset, 'test_test_data.pkl')): with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'rb') as f: test_data = pkl.load(f) with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'rb') as f: prune_recall = pkl.load(f) else: test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'wb') as f: pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'wb') as f: pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL) m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder, prune_recall) trainer = load_model(parameters['remodelfile'], m) _, _,_,p,r=trainer.eval_epoch(final=True, save_predictions=True) print('Saving test metrics ... ', end="") np.savetxt(parameters['remodelfile']+"/p.txt", p) np.savetxt(parameters['remodelfile']+"/r.txt", r) # b = numpy.loadtxt("filename.txt", delimiter=',') print('DONE')
def train(parameters): model_folder = setup_log(parameters, 'train') set_seed(0) ################################### # Data Loading ################################### print('\nLoading training data ...') train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds']) train_data = RelationDataset(train_loader, 'train', parameters['unk_w_prob'], train_loader).__call__() print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters) test_loader() test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'], train_loader).__call__() ################################### # TRAINING ################################### trainer = Trainer({ 'train': train_data, 'test': test_data }, parameters, train_loader, model_folder) trainer.run() trainer.eval_epoch(final=True, save_predictions=True) if parameters['plot']: plot_learning_curve(trainer, model_folder)
def test(parameters): model_folder = setup_log(parameters, 'test') print('\nLoading mappings ...') train_loader = load_mappings(model_folder) print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters) test_loader() test_data = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder) trainer = load_model(model_folder, m) trainer.eval_epoch(final=True, save_predictions=True)
def _test(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_test') print('\nLoading mappings ...') train_loader = load_mappings(parameters['remodelfile']) print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() m = Trainer(train_loader, parameters, { 'train': [], 'test': test_data }, model_folder, prune_recall) trainer = load_model(parameters['remodelfile'], m) trainer.eval_epoch(final=True, save_predictions=True)
def train(self): ################################### # TRAINING ################################### trainer = Trainer({'train': self.train_data, 'test': self.test_data}, self.parameters, self.train_loader, self.model_folder) trainer.run() trainer.eval_epoch(final=True, save_predictions=True) save_model(model_folder, trainer.model, train_loader) if parameters['plot']: plot_learning_curve(trainer, model_folder) return float(trainer.best_score)
def test(parameters): print('*** Testing Model ***') model_folder = setup_log(parameters, 'test') print('Loading mappings ...') with open(os.path.join(model_folder, 'mappings.pkl'), 'rb') as f: loader = pkl.load(f) print('Loading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters) test_loader.__call__() test_data = RelationDataset(test_loader, 'test', parameters['unk_w_prob'], loader).__call__() m = Trainer({ 'train': [], 'test': test_data }, parameters, loader, model_folder) trainer = load_model(model_folder, m) trainer.eval_epoch(final=True, save_predictions=True)