total_score_history = [] best_ckpt_score = None ckpt_file = os.path.join(models_folder, "summarizer_"+args.experiment+"_ckpt.bin") ckpt_optimizer_file = os.path.join(models_folder, "summarizer_optimizer_"+args.experiment+"_ckpt.bin") learning_rate = 2e-5 n_epochs = args.n_epochs utils_hdf5.DoublePrint("printlog_summarizer_"+args.experiment+"_"+datetime.now().strftime("%Y-%m-%d")+".log", "a") ## << Wooh if args.device == "cuda": print("Training on GPU "+str(freer_gpu)) bert_tokenizer = utils_tokenizer.BERTCacheTokenizer() print("---------------") summarizer = GeneTransformer(max_output_length=args.max_output_length, device=args.device, tokenizer_type='gpt2', starter_model=args.model_start) print("Summarizer loaded") def collate_func(inps): return [inp[0].decode() for inp in inps], [inp[1].decode() for inp in inps] param_optimizer = list(summarizer.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] logplot_file = os.path.join(args.log_folder, "gpt2_unsumm_"+args.experiment+".log") logplot = LogPlot(logplot_file)
ckpt_file = os.path.join(models_folder, "summarizer_" + args.experiment + "_ckpt.bin") ckpt_optimizer_file = os.path.join( models_folder, "summarizer_optimizer_" + args.experiment + "_ckpt.bin") learning_rate = 2e-5 n_epochs = args.n_epochs if args.device == "cuda": print("Training on GPU " + str(freer_gpu)) bert_tokenizer = utils_tokenizer.BERTCacheTokenizer() print("---------------") summarizer = GeneTransformer(max_output_length=args.max_output_length, device=args.device, tokenizer_type='gpt2', starter_model=summarizer_model_start) print("Summarizer loaded") def collate_func(inps): if ".db" in args.dataset_file: return [a['body'] for a in inps] else: return [inp[0].decode() for inp in inps] param_optimizer = list(summarizer.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params':
os.environ["CUDA_VISIBLE_DEVICES"] = "" + str(freer_gpu) args.experiment += "_" + freer_gpu learning_rate = 2e-5 n_epochs = args.n_epochs utils_hdf5.DoublePrint("printlog_generator_" + args.experiment + "_" + datetime.now().strftime("%Y-%m-%d") + ".log", "a") ## << Wooh bpe_model = "" if args.tokenizer == "bpecap": bpe_model = os.path.join(models_folder, "m.model") model = GeneTransformer(tokenizer_type=args.tokenizer, max_output_length=args.max_output_length, device=args.device, bpe_model=bpe_model) if len(args.starter_model) > 0: model.reload(os.path.join(models_folder, args.starter_model)) print("Model loaded") def collate_func(inps): return [inp[0] for inp in inps], [inp[1] for inp in inps] dataset = utils_hdf5.HDF5Dataset(args.dataset_file, collection_name="name") N = len(dataset) N_dev = 500