def main(args): print_args(args, 'args') conf = Config(args.exp) ### build model npz_config = read_json(conf.npz_config_path) scope = fluid.Scope() with fluid.scope_guard(scope): with fluid.unique_name.guard(): if args.model == 'UniRNN': model = RLUniRNN(conf, npz_config, candidate_encode=args.candidate_encode) elif args.model == 'PointerNet': model = RLPointerNet(conf, npz_config, candidate_encode=args.candidate_encode) algorithm = RLAlgorithm(model, optimizer=conf.optimizer, lr=conf.lr, gpu_id=(0 if args.use_cuda == 1 else -1), gamma=args.gamma) td_ct = RLComputationTask(algorithm, model_dir=conf.model_dir, mode=args.train_mode, scope=scope) # get eval model eval_args = copy.deepcopy(args) eval_args.exp = args.eval_exp eval_args.model = args.eval_model eval_args.task = 'eval' eval_td_ct = eval_entry_func(eval_args) ### other tasks if args.task == 'eps_greedy_sampling': eps_greedy_sampling(td_ct, eval_td_ct, args, conf, None, td_ct.ckp_step) exit() elif args.task == 'evaluate': evaluate(td_ct, eval_td_ct, args, conf, td_ct.ckp_step) exit() ### start training memory_size = 1000 replay_memory = collections.deque(maxlen=memory_size) summary_writer = tf.summary.FileWriter(conf.summary_dir) for epoch_id in range(td_ct.ckp_step + 1, conf.max_train_steps): if args.log_reward == 1: log_train(td_ct, args, conf, summary_writer, replay_memory, epoch_id) else: train(td_ct, eval_td_ct, args, conf, summary_writer, replay_memory, epoch_id) td_ct.save_model(conf.model_dir, epoch_id) eps_greedy_sampling(td_ct, eval_td_ct, args, conf, summary_writer, epoch_id)
def main(args): print_args(args, 'args') conf = Config(args.exp) ### build model npz_config = read_json(conf.npz_config_path) scope = fluid.Scope() with fluid.scope_guard(scope): with fluid.unique_name.guard(): if args.model == 'DNN': model = DNN(conf, npz_config) elif args.model == 'UniRNN': model = UniRNN(conf, npz_config) algorithm = GenAlgorithm(model, optimizer=conf.optimizer, lr=conf.lr, gpu_id=(0 if args.use_cuda == 1 else -1)) td_ct = GenComputationTask(algorithm, model_dir=conf.model_dir, mode=args.train_mode, scope=scope) # get eval model eval_args = copy.deepcopy(args) eval_args.exp = args.eval_exp eval_args.model = args.eval_model eval_args.task = 'eval' eval_td_ct = eval_entry_func(eval_args) ### other tasks if args.task == 'test': test(td_ct, args, conf, None, td_ct.ckp_step) exit() elif args.task == 'eps_greedy_sampling': eps_greedy_sampling(td_ct, eval_td_ct, args, conf, None, td_ct.ckp_step) exit() elif args.task == 'evaluate': evaluate(td_ct, eval_td_ct, args, conf, td_ct.ckp_step) exit() ### start training summary_writer = tf.summary.FileWriter(conf.summary_dir) for epoch_id in range(td_ct.ckp_step + 1, conf.max_train_steps): train(td_ct, args, conf, summary_writer, epoch_id) td_ct.save_model(conf.model_dir, epoch_id) test(td_ct, args, conf, summary_writer, epoch_id) eps_greedy_sampling(td_ct, eval_td_ct, args, conf, summary_writer, epoch_id)
def main(): arg_parser = argparse.ArgumentParser(description="Predict") arg_parser = add_data_reader_arguments(arg_parser) arg_parser = add_prediction_arguments(arg_parser) args = arg_parser.parse_args() args.data_parts = args.data_parts.split(",") print_args(args) reader = DataReader(**vars(args)) reader.prc(is_save=False) model = get_model(reader, args) predict(reader, model, args.save_preds_path, 0, num_beams=args.num_beams, do_sample=bool(args.do_sample))
def main(args): print_args(args, 'args') conf = Config(args.exp) ### build model npz_config = read_json(conf.npz_config_path) scope = fluid.Scope() with fluid.scope_guard(scope): with fluid.unique_name.guard(): if args.model == 'BiRNN': model = BiRNN(conf, npz_config) elif args.model == 'Trans': model = Transformer(conf, npz_config, num_blocks=2, num_head=4) algorithm = EvalAlgorithm(model, optimizer=conf.optimizer, lr=conf.lr, gpu_id=(0 if args.use_cuda == 1 else -1)) td_ct = EvalComputationTask(algorithm, model_dir=conf.model_dir, mode=args.train_mode, scope=scope) ### other tasks if args.task == 'test': test(td_ct, args, conf, None, td_ct.ckp_step) exit() elif args.task == 'debug': debug(td_ct, args, conf, None, td_ct.ckp_step) exit() elif args.task == 'eval': return td_ct ### start training summary_writer = tf.summary.FileWriter(conf.summary_dir) for epoch_id in range(td_ct.ckp_step + 1, conf.max_train_steps): train(td_ct, args, conf, summary_writer, epoch_id) td_ct.save_model(conf.model_dir, epoch_id) test(td_ct, args, conf, summary_writer, epoch_id)
def main(args): print_args(args, 'args') ct = get_ct_sim(args.exp, args.use_cuda == 1, args.train_mode, args.cell_type, args.output_dim) conf = ct.alg.model.conf ########### ### other tasks ########### if args.task == 'test': test(ct, args, conf, None, ct.ckp_step) exit() elif args.task == 'eval_list': return eval_list(ct, args, conf, ct.ckp_step, args.eval_npz_list) ################## ### start training ################## summary_writer = tf.summary.FileWriter(conf.summary_dir) for epoch_id in range(ct.ckp_step + 1, conf.max_train_steps + 1): train(ct, args, conf, summary_writer, epoch_id) ct.save_model(epoch_id) test(ct, args, conf, summary_writer, epoch_id)
embed_dim=256, GoogleEmbedding=False) else: print("**word2vec Embeddings!") args = parser.parse_args() random.seed(0) torch.manual_seed(6) now = datetime.datetime.now() args.experiment_folder = args.experiment_path + \ f"{now.year}_{now.month}_{now.day}_{now.hour}_{now.minute}/" if not os.path.exists(args.experiment_folder) and args.save_model: os.makedirs(args.experiment_folder) utils.print_args(args) # vocabs contain all vocab + <pad>, <bos>, <eos>, <unk> args.vocabs = utils.load_file(args.vocab_path, file_type='json') args.n_vocabs = len(args.vocabs) args.word2idx = {tok: i for i, tok in enumerate(args.vocabs)} args.idx2word = {i: tok for i, tok in enumerate(args.vocabs)} args.padding_idx = args.word2idx[args.padding_symbol] batch_gen_train, batch_gen_test = data_load.create_batch_generators(args) batcher = lm_model.TokenBatcher(args) # Sentence encoder sentence_encoder = model.SentenceEmbeddingModel(args).to(args.device) # Convolution layer for extracting global coherence patterns global_feature_extractor = model.LightweightConvolution(args).to(args.device) # Bilinear layer for modeling inter-sentence relation
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() # if args.load_huggingface: # args.make_vocab_size_divisible_by = 1 # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT3 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.vocab_size, args.eod_token, tokenizer = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) # Resume data loader if necessary. if args.resume_dataloader: if train_data is not None: train_data.batch_sampler.start_iter = args.iteration % len(train_data) print_rank_0(f"Resume train set from iteration {train_data.batch_sampler.start_iter}") if val_data is not None: start_iter_val = (args.train_iters // args.save_interval) * args.eval_interval val_data.batch_sampler.start_iter = start_iter_val % len(val_data) if train_data is not None: train_data_iterator = iter(train_data) else: train_data_iterator = None iteration = 0 if args.train_iters > 0: if args.do_train: iteration, skipped = train(model, optimizer, lr_scheduler, train_data_iterator, val_data, timers, args, tokenizer) if args.do_valid: prefix = 'the end of training for val data' # val_loss, val_ppl _ = evaluate_and_print_results(prefix, iter(val_data) if val_data else None, model, args, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, model, optimizer, lr_scheduler, args, deepspeed=DEEPSPEED_WRAP and args.deepspeed) if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, iter(test_data) if test_data else None, model, args, timers, True)
args.cuda = args.cuda and torch.cuda.is_available() out_dir = args.out_dir_path.strip('\r\n') model_save = os.path.join(out_dir, 'models/modelbgrepproper.pt') U.mkdir_p(out_dir + '/preds') U.mkdir_p(out_dir + '/models/') U.mkdir_p(out_dir + '/logs/') configure(os.path.join(out_dir, 'logs/'+args.nm), flush_secs=5) U.set_logger(out_dir) U.print_args(args) DEFAULT_COMPRESSED_DATASET = 'datasets-pickled.pkl' np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) if args.compressed_datasets == '': # train train_dataset = ASAPDataset(args.train_path, maxlen=args.maxlen, vocab_size=args.vocab_size, vocab_file=out_dir + '/vocab.pkl', pos=args.pos, read_vocab=(args.vocab_path is not None)) vocab = train_dataset.vocab train_dataset.make_scores_model_friendly() # test test_dataset = ASAPDataset(args.test_path, maxlen=args.maxlen, vocab=vocab, pos=args.pos)
parser.add_argument("--batch_size", default=10, type=int, help="batch size for gumbel-softmax samples") parser.add_argument("--attack_target", default="premise", type=str, choices=["premise", "hypothesis"], help="attack either the premise or hypothesis for MNLI") parser.add_argument("--initial_coeff", default=15, type=int, help="initial log coefficients") parser.add_argument("--adv_loss", default="cw", type=str, choices=["cw", "ce"], help="adversarial loss") parser.add_argument("--constraint", default="bertscore_idf", type=str, choices=["cosine", "bertscore", "bertscore_idf"], help="constraint function") parser.add_argument("--lr", default=3e-1, type=float, help="learning rate") parser.add_argument("--kappa", default=5, type=float, help="CW loss margin") parser.add_argument("--embed_layer", default=-1, type=int, help="which layer of LM to extract embeddings from") parser.add_argument("--lam_sim", default=1, type=float, help="embedding similarity regularizer") parser.add_argument("--lam_perp", default=1, type=float, help="(log) perplexity regularizer") parser.add_argument("--print_every", default=10, type=int, help="print loss every x iterations") parser.add_argument("--gumbel_samples", default=100, type=int, help="number of gumbel samples; if 0, use argmax") args = parser.parse_args() print_args(args) main(args)