def __print_epoch_results(self, ep_no, all_ep, train_tasks, valid_tasks): result = 'epoch: {}/{}> '.format(ep_no, all_ep) for cur_task in train_tasks.items(): if cur_task[1].size > 0: result += '|T: {}, tr-loss: {:.3f}, tr-f1: {:.3f} '.format( cur_task[0], cur_task[1].loss, cur_task[1].f1) if valid_tasks is not None: for cur_task in valid_tasks.items(): if cur_task[1].size > 0: result += '|T: {}, va-loss: {:.3f}, va-f1: {:.3f} '.format( cur_task[0], cur_task[1].loss, cur_task[1].f1) result += '\t' + ELib.get_time() print(result)
def test(self, test_bundle, return_output_vecs=False, weighted_instance_loss=False, print_perf=True, title=None, report_number_of_intervals=20, return_output_vecs_get_details=True): if len(test_bundle.task_list) > 1: print('only one task is allowed for testing') return None if len(test_bundle.tws) == 0: return list(), list(), list(), list() if title is None: title = '' else: title += ' ' self.bert_classifier.to(self.config.device) self.bert_classifier.zero_grad() self.bert_classifier.eval() self.setup_objective(weighted_instance_loss) test_dt = EBertDataset(test_bundle, self.tokenizer, self.config.max_seq) batches = self.generate_batches([test_dt], self.config, False, False, 0, EInputListMode.sequential) result_vecs = list() result_vecs_detail = list() tasks = {test_bundle.task_list[0] : ETaskState(test_bundle.task_list[0])} print(title + 'labeling ', end=' ', flush=True) with torch.no_grad(): for ba_ind, cur_batch in enumerate(batches): outcome = self.bert_classifier(cur_batch, False) self.__process_loss(outcome, cur_batch, tasks, False, weighted_instance_loss) if return_output_vecs: result_vecs.extend(self.bert_classifier.output_vecs) if self.bert_classifier.output_vecs_detail is not None and return_output_vecs_get_details: result_vecs_detail.extend(self.bert_classifier.output_vecs_detail) if ELib.progress_made(ba_ind, cur_batch['batch_count'], report_number_of_intervals): print(ELib.progress_percent(ba_ind, cur_batch['batch_count']), end=' ', flush=True) self.delete_batch_from_gpu(cur_batch, EInputListMode.sequential) del cur_batch, outcome print() task_out = tasks[test_bundle.task_list[0]] task_out.loss /= task_out.size perf = ELib.calculate_metrics(task_out.lbl_true, task_out.lbl_pred) if print_perf: print('Test Results L1> Loss: {:.3f} F1: {:.3f} Pre: {:.3f} Rec: {:.3f}'.format( task_out.loss, perf[0], perf[1], perf[2]) + '\t\t' + ELib.get_time()) self.bert_classifier.cpu() return task_out.lbl_pred, task_out.logits, [result_vecs, result_vecs_detail], perf
def main(): parser = argparse.ArgumentParser() # general params parser.add_argument("--cmd", default=None, type=str, required=True, help='') parser.add_argument("--TEMP_DIR", default=temp_dir, type=str, required=False, help='') # pretrain params parser.add_argument("--itr", default=1, type=int, required=False, help='') parser.add_argument("--model_path", default=None, type=str, required=True, help='') parser.add_argument("--train_path", default=None, type=str, required=True, help='') parser.add_argument("--test_path", default=None, type=str, required=True, help='') parser.add_argument("--unlabeled_path", default=None, type=str, required=True, help='') parser.add_argument("--output_dir", default=None, type=str, required=True, help='') parser.add_argument("--device", default=None, type=int, required=True, help='') parser.add_argument("--seed", default=None, type=int, required=True, help='') parser.add_argument("--train_sample", default=None, type=int, required=True, help='') parser.add_argument("--unlabeled_sample", default=None, type=int, required=True, help='') ## ignore these! parser.add_argument("--per_query", default=False, type=bool, required=False, help='') # ignore it parser.add_argument("--model_path_2", default=None, type=str, required=False, help='') # ignore it parser.add_argument("--lm_model_path", default=None, type=str, required=False, help='') # ignore it parser.add_argument("--t_lbl_path_1", default=None, type=str, required=False, help='') # ignore it parser.add_argument("--t_lbl_path_2", default=None, type=str, required=False, help='') # ignore it parser.add_argument("--valid_path", default=None, type=str, required=False, help='') # ignore it parser.add_argument("--device_2", default=None, type=int, required=False, help='') # ignore it args, unknown = parser.parse_known_args() device = 'cpu' device_name = device if args.device >= 0: device = 'cuda:' + str(args.device) device_name = torch.cuda.get_device_name(args.device) device_2 = 'cpu' if 'device_2' in args and (args.device_2 is not None and args.device_2 >= 0): device_2 = 'cuda:' + str(args.device_2) device_name = device_name + ', ' + torch.cuda.get_device_name( args.device) print('setup:', '| python>', platform.python_version(), '| numpy>', np.__version__, '| pytorch>', torch.__version__, '| device>', device_name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if args.cmd.startswith('bert'): seed = args.seed for cur_itr in range(args.itr): print('------------------------------------') print('iteration ' + str(cur_itr + 1) + ' began with seed=\'' + str(seed) + '\' at ' + ELib.get_time()) if cur_itr >= 0: output_dir = args.output_dir + '_' + str(cur_itr) EPretrainProj.run(args.cmd, args.per_query, args.train_path, args.valid_path, args.test_path, args.unlabeled_path, args.model_path, args.model_path_2, args.lm_model_path, args.t_lbl_path_1, args.t_lbl_path_2, output_dir, device, device_2, seed, args.train_sample, args.unlabeled_sample) seed += 1230 ELib.PASS() ELib.PASS()
device_name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if args.cmd.startswith('bert'): seed = args.seed for cur_itr in range(args.itr): print('------------------------------------') print('iteration ' + str(cur_itr + 1) + ' began with seed=\'' + str(seed) + '\' at ' + ELib.get_time()) if cur_itr >= 0: output_dir = args.output_dir + '_' + str(cur_itr) EPretrainProj.run(args.cmd, args.per_query, args.train_path, args.valid_path, args.test_path, args.unlabeled_path, args.model_path, args.model_path_2, args.lm_model_path, args.t_lbl_path_1, args.t_lbl_path_2, output_dir, device, device_2, seed, args.train_sample, args.unlabeled_sample) seed += 1230 ELib.PASS() ELib.PASS() if __name__ == "__main__": print("Started at", ELib.get_time()) main() print("\nDone at", ELib.get_time()) pass