def main(args): init_logger() set_seeds() tokenizer = load_tokenizer(args) if args.do_train: if args.model_4: first =TrainerFirst(args, tokenizer) first.train() second = SecondClassifier(args, tokenizer) second.classifier() third = Trainermmd(args, tokenizer) third.train() elif args.new_model_4: first =FirstTrainer(args, tokenizer) first.train() second = SecondClassifier_n(args, tokenizer) second.classifier() third = Trainermmd_n(args, tokenizer) third.train() else: trainer = Trainer(args, tokenizer) trainer.train() elif args.do_test: if args.model_4 or args.new_model_4: tester = Predictor(args, tokenizer) tester.predict() else: tester = Tester(args, tokenizer) tester.test() elif args.do_interactive: interactive_predict(args)
def __init__(self, vocab_size, pretrain_file, device, data_parallel, n_top_layers=4, hidden_dim=768, max_sents_len=512, hidden_dropout_rate=0.1, attn_dropout_rate=0.1, n_heads=12, n_layers=12, variance_epsilon=1e-12): super().__init__() self.pretrain_file = pretrain_file self.data_parallel = data_parallel self.device = device set_seeds(42) self.model = BertAverage(vocab_size, n_top_layers, hidden_dim, max_sents_len, hidden_dropout_rate, attn_dropout_rate, n_heads, n_layers, variance_epsilon) self.model.eval() self.load(self.model, self.pretrain_file) self.model.double().to(self.device) if self.data_parallel: self.model = DataParallel(self.model) self.Instance = namedtuple('Instance', ['id', 'sentence', 'label'])
def main(task='mrpc', train_cfg='config/train_mrpc.json', model_cfg='config/bert_base.json', data_file='../glue/MRPC/train.tsv', model_file=None, pretrain_file='../uncased_L-12_H-768_A-12/bert_model.ckpt', data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', save_dir='../exp/bert/mrpc', max_len=128, mode='train'): cfg = train.Config.from_json(train_cfg) model_cfg = models.Config.from_json(model_cfg) set_seeds(cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) TaskDataset = dataset_class( task) # task dataset class according to the task pipeline = [ Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, max_len) ] dataset = TaskDataset(data_file, pipeline) data_iter = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True) model = Classifier(model_cfg, len(TaskDataset.labels)) criterion = nn.CrossEntropyLoss() trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model), save_dir, get_device()) if mode == 'train': def get_loss(model, batch, global_step): # make sure loss is a scalar tensor input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) loss = criterion(logits, label_id) return loss trainer.train(get_loss, model_file, pretrain_file, data_parallel) elif mode == 'eval': def evaluate(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() #.cpu().numpy() accuracy = result.mean() return accuracy, result results = trainer.eval(evaluate, model_file, data_parallel) total_accuracy = torch.cat(results).mean().item() print('Accuracy:', total_accuracy)
def __init__(self, args): self.args = args cfg = train.Config.from_json(args.train_cfg) model_cfg = models.Config.from_json(args.model_cfg) set_seeds(cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab, do_lower_case=True) tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x) ) pipeline = [ Preprocess4Pretrain(args.max_pred, args.mask_prob, list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, model_cfg.max_len, args.mask_alpha, args.mask_beta, args.max_gram) ] data_iter = DataLoader(SentPairDataset(args.data_file, cfg.batch_size, tokenize, model_cfg.max_len, pipeline=pipeline), batch_size=cfg.batch_size, collate_fn=seq_collate, num_workers=mp.cpu_count()) model = Generator(model_cfg) self.optimizer = optim.optim4GPU(cfg, model) self.trainer = train.MLMTrainer(cfg, model, data_iter, self.optimizer, args.save_dir, get_device()) os.makedirs(os.path.join(args.log_dir, args.name), exist_ok=True) self.writer = SummaryWriter(log_dir=os.path.join( args.log_dir, args.name)) # for tensorboardX
def main(config='config/finetune/agnews/train.json'): cfg = Config(**json.load(open(config, "r"))) cfg_data = data.Config(**json.load(open(cfg.cfg_data, "r"))) cfg_model = models.Config(**json.load(open(cfg.cfg_model, "r"))) cfg_optim = trainer.Config(**json.load(open(cfg.cfg_optim, "r"))) set_seeds(cfg.seed) TaskDataset = data.get_class( cfg_data.task) # task dataset class according to the task tokenizer = tokenization.FullTokenizer(vocab_file=cfg_data.vocab_file, do_lower_case=True) dataset = TaskDataset( cfg_data.data_file[cfg.mode], pipelines=[ data.RemoveSymbols('\\'), data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), data.AddSpecialTokensWithTruncation(cfg_data.max_len), data.TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, cfg_data.max_len) ], n_data=None) dataset = TensorDataset(*dataset.get_tensors()) # To Tensors data_iter = DataLoader(dataset, batch_size=cfg_optim.batch_size, shuffle=True) classifier = models.Classifier4Transformer(cfg_model, len(TaskDataset.labels)) optimizer = optim.optim4GPU(cfg_optim, classifier) train_loop = trainer.TrainLoop(cfg_optim, classifier, data_iter, optimizer, cfg.save_dir, get_device()) def get_loss(model, batch, global_step): # make sure loss is a scalar tensor input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) loss = nn.CrossEntropyLoss()(logits, label_id) return loss def evaluate(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() #.cpu().numpy() accuracy = result.mean() return accuracy, result if cfg.mode == "train": train_loop.train(get_loss, cfg.model_file, cfg.pretrain_file) print("Training has been done properly.") elif cfg.mode == "eval": results = train_loop.eval(evaluate, cfg.model_file) total_accuracy = torch.cat(results).mean().item() print(f"Accuracy: {total_accuracy}")
def main(): args = parse_args() set_seeds(args.seed) # Because I keep forgetting to select the correct env assert args.env in args.history_file env = create_env(args.env) print("Loading environment", args.env) print("Action space:", env.action_space) print("Observation space:", env.observation_space) history = pickle_load(args.history_file) if args.best_agent: agents = get_best_agents_dedup(history, 1) eval_outfile = 'eval_best_agent' elif args.best_agent_ensemble: agents = get_best_agents_dedup(history, 3) eval_outfile = 'eval_best_agent_ensemble' elif args.last_agent: agents = [history['agents'][-1][0]] eval_outfile = 'eval_last_agent' elif args.last_agent_ensemble: agents = history['agents'][-1][:3] eval_outfile = 'eval_last_agent_ensemble' else: assert False dim_in, dim_out = get_input_output_dim(env) policies = [a.get_policy(dim_in, dim_out) for a in agents] policy = EnsemblePolicy(policies) if args.evaluate: total_rew = [] for i in range(args.evaluate): env.seed(args.seed + i) ep_rew, ep_len = run_episode(env, policy) total_rew.append(ep_rew) print( f"ep = {i + 1}/{args.evaluate} reward = {ep_rew:.2f} len = {ep_len}" ) print( f"mean_reward = {np.mean(total_rew):.2f} +- {np.std(total_rew):.2f}" ) np.save(Path(args.history_file).with_name(eval_outfile), total_rew) elif args.render: for i in count(): env.seed(args.seed + i) ep_rew, ep_len = run_episode(env, policy, render_human) print(f"ep = {i + 1}/inf reward = {ep_rew:.2f} len = {ep_len}") elif args.gif: render_gif = RenderGif() env.seed(args.seed) run_episode(env, policy, render_gif) render_gif.save(args.gif)
def main(train_cfg='config/bert_pretrain.json', model_cfg='config/bert_base.json', data_file='../tbc/books_large_all.txt', model_file=None, data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', save_dir='../exp/bert/pretrain', log_dir='../exp/bert/pretrain/runs', max_len=512, max_pred=20, mask_prob=0.15): train_cfg = BertTrainConfig.from_json(train_cfg) model_cfg = BertModelConfig.from_json(model_cfg) set_seeds(train_cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x)) pipeline = [Preprocess4Pretrain(max_pred, mask_prob, list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, max_len)] data_iter = SentPairDataLoader(data_file, train_cfg.batch_size, tokenize, max_len, pipeline=pipeline) model = BertModel4Pretrain(model_cfg) criterion1 = nn.CrossEntropyLoss(reduction='none') criterion2 = nn.CrossEntropyLoss() optimizer = optim.optim4GPU(train_cfg, model) trainer = train.Trainer(train_cfg, model_cfg, model, data_iter, optimizer, save_dir, get_device()) writer = SummaryWriter(log_dir=log_dir) # for tensorboardX def get_loss(model, batch, global_step, train_cfg, model_cfg): # make sure loss is tensor input_ids, segment_ids, input_mask, masked_ids, masked_pos, masked_weights, is_next = batch logits_lm, logits_clsf = model(input_ids, segment_ids, input_mask, masked_pos) loss_lm = criterion1(logits_lm.transpose(1, 2), masked_ids) # for masked LM loss_lm = (loss_lm*masked_weights.float()).mean() loss_clsf = criterion2(logits_clsf, is_next) # for sentence classification writer.add_scalars('data/scalar_group', {'loss_lm': loss_lm.item(), 'loss_clsf': loss_clsf.item(), 'loss_total': (loss_lm + loss_clsf).item(), 'lr': optimizer.get_lr()[0], }, global_step) return loss_lm + loss_clsf trainer.train(get_loss, model_file, None, data_parallel)
def main(task='mrpc', base_train_cfg='config/QDElectra_pretrain.json', train_cfg='config/train_mrpc.json', model_cfg='config/QDElectra_base.json', data_file='../glue/MRPC/train.tsv', model_file=None, data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', log_dir='../exp/electra/pretrain/runs', save_dir='../exp/bert/mrpc', mode='train', pred_distill=True): train_cfg_dict = json.load(open(base_train_cfg, "r")) train_cfg_dict.update(json.load(open(train_cfg, "r"))) train_cfg = ElectraConfig().from_dict(train_cfg_dict) # train_cfg = ElectraConfig().from_json_file(train_cfg) model_cfg = ElectraConfig().from_json_file(model_cfg) output_mode, train_cfg.n_epochs, max_len = get_task_params(task) set_seeds(train_cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) TaskDataset = dataset_class(task) # task dataset class according to the task num_labels = len(TaskDataset.labels) pipeline = [ Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, output_mode, max_len) ] data_set = TaskDataset(data_file, pipeline) data_iter = DataLoader(data_set, batch_size=train_cfg.batch_size, shuffle=True) t_discriminator = ElectraForSequenceClassification.from_pretrained( 'google/electra-base-discriminator' ) s_discriminator = QuantizedElectraForSequenceClassification.from_pretrained( 'google/electra-small-discriminator', config=model_cfg ) model = DistillElectraForSequenceClassification(t_discriminator, s_discriminator, model_cfg) optimizer = optim.optim4GPU(train_cfg, model) writer = SummaryWriter(log_dir=log_dir) # for tensorboardX base_trainer_args = (train_cfg, model_cfg, model, data_iter, optimizer, save_dir, get_device()) trainer = QuantizedDistillElectraTrainer(writer, *base_trainer_args) if mode == 'train': trainer.train(model_file, None, data_parallel) elif mode == 'eval': input_ids, attention_mask, token_type_ids, label_ids = TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, output_mode, max_len) _, eval_labels = get_tensor_data(output_mode, input_ids, attention_mask, token_type_ids, label_ids) results = trainer.eval(model_file, output_mode, eval_labels, num_labels, data_parallel) total_accuracy = torch.cat(results).mean().item() print('Accuracy:', total_accuracy)
def main(train_cfg='config/electra_pretrain.json', model_cfg='config/electra_small.json', data_file='../tbc/books_large_all.txt', model_file=None, data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', log_dir='../exp/electra/pretrain/runs', save_dir='../exp/electra/pretrain', max_len=128, max_pred=20, mask_prob=0.15, quantize=False): check_dirs_exist([log_dir, save_dir]) train_cfg = ElectraConfig().from_json_file(train_cfg) model_cfg = ElectraConfig().from_json_file(model_cfg) set_seeds(train_cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x)) pipeline = [ Preprocess4Pretrain(max_pred, mask_prob, list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, max_len) ] data_iter = SentPairDataLoader(data_file, train_cfg.batch_size, tokenize, max_len, pipeline=pipeline) # Get distilled-electra and quantized-distilled-electra generator = ElectraForMaskedLM.from_pretrained( 'google/electra-small-generator') t_discriminator = ElectraForPreTraining.from_pretrained( 'google/electra-base-discriminator') s_discriminator = QuantizedElectraForPreTraining( model_cfg) if quantize else ElectraForPreTraining s_discriminator = s_discriminator.from_pretrained( 'google/electra-small-discriminator', config=model_cfg) # model # config is used for model "QuantizedElectraForPreTraining" model = DistillElectraForPreTraining(generator, t_discriminator, s_discriminator, model_cfg) optimizer = optim.optim4GPU(train_cfg, model) writer = SummaryWriter(log_dir=log_dir) # for tensorboardX base_trainer_args = (train_cfg, model_cfg, model, data_iter, None, optimizer, save_dir, get_device()) trainer = QuantizedDistillElectraTrainer(writer, *base_trainer_args) trainer.train(model_file, None, data_parallel) trainer._eval()
def test_random_gridworld_generation(self): set_seeds(314159) mdp = GridworldMdp.generate_random(8, 8, 0, 0) self.assertEqual(mdp.height, 8) self.assertEqual(mdp.width, 8) mdp_string = str(mdp) self.assertEqual(mdp_string.count('X'), 28) self.assertEqual(mdp_string.count(' '), 34) self.assertEqual(mdp_string.count('A'), 1) self.assertEqual(mdp_string.count('3'), 1)
def main(opt): with open('config.json', 'r', encoding='utf-8') as f: args = AttrDict(json.load(f)) os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda init_logger() set_seeds() tokenizer = load_tokenizer(args) if opt.train: trainer = Trainer(args, tokenizer) trainer.train()
def main(args): cfg = train.Config.from_json(args.train_cfg) model_cfg = models.Config.from_json(args.model_cfg) set_seeds(cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab, do_lower_case=True) tokenize = lambda x: tokenizer.tokenize(tokenizer.convert_to_unicode(x)) pipeline = [ Preprocess4Pretrain(args.max_pred, args.mask_prob, list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, model_cfg.max_len, args.mask_alpha, args.mask_beta, args.max_gram) ] data_iter = SentPairDataLoader(args.data_file, cfg.batch_size, tokenize, model_cfg.max_len, pipeline=pipeline) model = BertModel4Pretrain(model_cfg) criterion1 = nn.CrossEntropyLoss(reduction='none') criterion2 = nn.CrossEntropyLoss() optimizer = optim.optim4GPU(cfg, model) trainer = train.Trainer(cfg, model, data_iter, optimizer, args.save_dir, get_device()) writer = SummaryWriter(log_dir=args.log_dir) # for tensorboardX def get_loss(model, batch, global_step): # make sure loss is tensor input_ids, segment_ids, input_mask, masked_ids, masked_pos, masked_weights, is_next = batch logits_lm, logits_clsf = model(input_ids, segment_ids, input_mask, masked_pos) loss_lm = criterion1(logits_lm.transpose(1, 2), masked_ids) # for masked LM loss_lm = (loss_lm * masked_weights.float()).mean() loss_sop = criterion2(logits_clsf, is_next) # for sentence classification writer.add_scalars( 'data/scalar_group', { 'loss_lm': loss_lm.item(), 'loss_sop': loss_sop.item(), 'loss_total': (loss_lm + loss_sop).item(), 'lr': optimizer.get_lr()[0], }, global_step) return loss_lm + loss_sop trainer.train(get_loss, model_file=None, data_parallel=True)
def main(): parser = argparse.ArgumentParser(description='Pretraining argument parser') parser = load_pretrain_args(parser) parser = load_test_args(parser) args = parser.parse_args() set_seeds(args.seed) train_data = get_train_data() valid_data = get_valid_data() test_data = get_test_data() nnet = create_nnet(train_data, args) optimizer = Adam(nnet.parameters(), lr=args.lr) ce_loss = nn.CrossEntropyLoss() mse_loss = nn.MSELoss() action_space = ActionSpace() tb = SummaryWriter() best_score = 0 for epoch in range(1, args.update_epochs + 1): print(f'Epoch {epoch}') for indice in random_batch(len(train_data), args.train_batch_size): batch = train_data[indice] input_batch = to_input_batch(batch, torch.device('cuda')) policies, values = nnet(input_batch) target_policies = get_target_policies(batch, action_space).cuda() target_values = get_target_values(batch).cuda() policy_loss = ce_loss(policies, target_policies) value_loss = mse_loss(values, target_values) loss = policy_loss + value_loss optimizer.zero_grad() loss.backward() optimizer.step() accuracy = test(valid_data, nnet, args, tb, epoch) if accuracy > best_score: best_score = accuracy torch.save(nnet.module.state_dict(), 'models/pretrained.pt') nnet.module.load_state_dict(torch.load('models/pretrained.pt')) test(test_data, nnet, args, tb, args.update_epochs + 1)
def compare_agents(self, name, agent1, agent2, places=7, print_mdp=False): print('Comparing {0} agents'.format(name)) set_seeds(314159) mdp = GridworldMdp.generate_random_connected(16, 16, 5, 0.2) if print_mdp: print(mdp) env = Mdp(mdp) self.time(lambda: agent1.set_mdp(mdp), "Python planner") self.time(lambda: agent2.set_mdp(mdp), "Numpy/Tensorflow planner") for s in mdp.get_states(): for a in mdp.get_actions(s): mu = agent1.extend_state_to_mu(s) qval1, qval2 = agent1.qvalue(mu, a), agent2.qvalue(mu, a) self.assertAlmostEqual(qval1, qval2, places=places)
def main(): args = parse_args() # Fix the random seeds. set_seeds(args.random_seed) # Default torch settings. torch.set_default_dtype(torch.float64) if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.DoubleTensor) # load data gpdists, g = load_graph_pdists(args.input_graph, cache_dir='.cached_pdists') n_nodes = g.number_of_nodes() ds = GraphDataset(gpdists) fp = FastPrecision(g) # run hyp2 hyp = Lorentz(3) emb = ManifoldEmbedding(n_nodes, [hyp] * args.n_factors) for i in range(args.n_factors): emb.scales[i] = torch.nn.Parameter(torch.tensor(2.0)) man_name = '_'.join('hyp2' for _ in range(args.n_factors)) save_dir = os.path.join(args.save_dir, man_name) if args.hyp_snapshot or args.hyp_pretrained: logging.info('Loading embedding for %s', man_name) load_embedding(emb, save_dir) if not args.hyp_pretrained: train(ds, fp, emb, args.n_epochs, save_dir) # map it to SPD spd = SPD(2 * args.n_factors) spd_emb = ManifoldEmbedding(n_nodes, [spd]) save_dir = os.path.join(args.save_dir, 'spd{}'.format(spd.dim)) if args.spd_snapshot: logging.info('Loading embedding for SPD%d', spd.dim) load_embedding(spd_emb, save_dir) else: with torch.no_grad(): spd_emb.xs[0] = ManifoldParameter(block_diag([ h2_to_sspd2(emb.xs[i].mul(math.sqrt(2))) for i in range(args.n_factors) ]), manifold=spd) hyp_dists = emb.to('cpu').compute_dists(None) spd_dists = spd_emb.compute_dists(None).to('cpu') assert torch.allclose(hyp_dists, spd_dists, atol=1e-4) # run spd2 train(ds, fp, spd_emb, args.n_epochs, save_dir, args.n_epochs)
def __init__(self, model, loaders, criterion, optim, scheduler, val_step, is_tensorboard, verbose, device, random_seeds=42): self.model = model.to(device) self.loaders = loaders self.criterion = criterion.to(device) self.optim = optim self.scheduler = scheduler self.val_step = val_step self.checkpoint_save_step = 100 self.verbose = verbose self.is_tensorboard = is_tensorboard self.device = device set_seeds(random_seeds)
def main(): parser = argparse.ArgumentParser(description='Test argument parser') parser = load_test_args(parser) args = parser.parse_args() set_seeds(args.seed) test_data = get_test_data() nnet = create_nnet(test_data, args) nnet.module.load_state_dict(torch.load(f'models/{args.load}')) tb = SummaryWriter() start_time = time() policy_test(test_data, nnet, args, tb, epoch=0) print(f'test time: {time() - start_time:.3f} sec.')
def main(): mp.set_start_method('spawn') mpp.Pool.istarmap = istarmap # for tqdm parser = argparse.ArgumentParser(description='Training argument parser') parser = load_train_args(parser) parser = load_test_args(parser) args = parser.parse_args() set_seeds(args.seed) train_data = get_train_data() valid_data = get_valid_data() nnet = create_nnet(train_data, args) nnet.module.load_state_dict(torch.load(f'models/{args.load}')) nnets = create_nnets(train_data, args, n_nnets=torch.cuda.device_count()) optimizer = Adam(nnet.parameters(), lr=args.lr) policy_loss_fn = nn.KLDivLoss(reduction='batchmean') value_loss_fn = nn.MSELoss() action_space = ActionSpace() train_examples = deque(maxlen=args.examples_len) tb = SummaryWriter() # tensorboard writer epoch = 0 while True: for indice in random_batch(len(train_data), args.train_batch_size): epoch += 1 print(f'Epoch {epoch}') copy_nnet(nnet, nnets) # nnet -> nnets curr_examples = simulate(train_data[indice], nnets, action_space, args) train_examples.extend(curr_examples) update_net(train_examples, nnet, optimizer, policy_loss_fn, value_loss_fn, args, tb, epoch) test(valid_data, nnet, args, tb, epoch)
def __init__(self, opts): self.opts = opts self.device = opts['device'] # Logdir self.logdir = os.path.join(opts['logdir'], opts['exp_name'], opts['variant_name']) io.makedirs(self.logdir) # Set seeds rn = utils.set_seeds(opts['seed']) self.model = MetaSim(opts).to(self.device) self.generator = self.model.generator tasknet_class = get_tasknet(opts['dataset']) self.tasknet = tasknet_class(opts['task']).to( self.opts['task']['device']) # Data sgl = get_scene_graph_loader(opts['dataset']) self.scene_graph_dataset = sgl(self.generator, self.opts['epoch_length']) # Rendering layer self.renderer = RenderLayer(self.generator, self.device) # MMD self.mmd = MMDInception(device=self.device, resize_input=self.opts['mmd_resize_input'], include_image=False, dims=self.opts['mmd_dims']) dl = get_loader(opts['dataset']) self.target_dataset = dl(self.opts['task']['val_root']) # In the paper, this is different # than the data used to get task net acc. # Keeping it the same here for simplicity to # reduce memory overhead. To do this correctly, # generate another copy of the target data # and use it for MMD computation. # Optimizer self.optimizer = torch.optim.Adam( self.model.parameters(), lr=opts['optim']['lr'], weight_decay=opts['optim']['weight_decay']) # LR scheduler self.lr_sched = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=opts['optim']['lr_decay'], gamma=opts['optim']['lr_decay_gamma'])
def generate(cfg): from tqdm import trange from utils import set_seeds cfg.setdefault('seed', 0) set_seeds(cfg.seed) print(f'Random seed: {cfg.seed}') folder = Path(cfg.folder).expanduser().resolve() folder.mkdir(parents=True, exist_ok=True) print(f'Saving datasets in: {folder}') with open(folder / 'datasets.yaml', 'w') as f: f.write(cfg.toYAML()) for p, params in cfg.datasets.items(): dataset = InfectionDataset(**{k: v for k, v in params.items() if k != 'num_samples'}) dataset.samples = [dataset.random_sample() for _ in trange(params.num_samples, desc=p.capitalize(), unit='samples', leave=True)] path = folder.joinpath(p).with_suffix('.pt') torch.save(dataset, path) print(f'{p.capitalize()}: saved {len(dataset)} samples in: {path}')
def main(_): print("Model Architecture: {}".format(FLAGS.model_architecture)) # Adjust some parameters if FLAGS.debug: FLAGS.small_label_set = False print("RUNNING IN DEBUG MODE") FLAGS.num_classes = utils.get_num_classes(FLAGS) X_train, y_train = data_utils.load_dataset_tf(FLAGS, mode="train") X_val, y_val = data_utils.load_dataset_tf(FLAGS, mode="val") # comet_ml experiment logging (https://www.comet.ml/) experiment = Experiment(api_key="J55UNlgtffTDmziKUlszSMW2w", log_code=False) experiment.log_multiple_params(utils.gather_params(FLAGS)) experiment.set_num_of_epocs(FLAGS.epochs) experiment.log_dataset_hash(X_train) tf.logging.set_verbosity(tf.logging.INFO) # Start a new, DEFAULT TensorFlow session. sess = tf.InteractiveSession() utils.set_seeds() # Get deterministic behavior? model = models.create_model(FLAGS) fw = framework.Framework(sess, model, experiment, FLAGS) num_params = int(utils.get_number_of_params()) model_size = num_params * 4 print("\nNumber of trainable parameters: {}".format(num_params)) print("Model is ~ {} bytes out of max 5000000 bytes\n".format(model_size)) experiment.log_parameter("num_params", num_params) experiment.log_parameter("approx_model_size", model_size) fw.optimize(X_train, y_train, X_val, y_val)
def main(): args = parse_args() # Fix the random seeds. set_seeds(args.random_seed) # Default torch settings. torch.set_default_dtype(torch.float64) if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.DoubleTensor) # load data gpdists, g = load_graph_pdists(args.input_graph, cache_dir='.cached_pdists') n_nodes = g.number_of_nodes() ds = GraphDataset(gpdists) fp = FastPrecision(g) # run hyp2 emb = ManifoldEmbedding(n_nodes, [Lorentz(3)]) path = os.path.join(args.save_dir, 'hyp2') train(ds, fp, emb, args.n_epochs, path) curvature_sq = 1 / emb.scales[0] # map it to SSPD sspd_emb = ManifoldEmbedding(n_nodes, [SPD(2)]) sspd_emb.xs[0] = ManifoldParameter(h2_to_sspd2(emb.xs[0] / curvature_sq.sqrt()), manifold=sspd_emb.manifolds[0]) sspd_emb.scales[0] = torch.nn.Parameter(1 / curvature_sq / 2) assert torch.allclose(emb.compute_dists(None), sspd_emb.compute_dists(None), atol=1e-4) # run spd2 path = os.path.join(args.save_dir, 'spd2') train(ds, fp, sspd_emb, args.n_epochs, path, args.n_epochs)
import tensorflow as tf import numpy as np from utils import set_seeds, Data, create_normal_variable set_seeds(43) def model(X, weights, biases, dropout_prob): n_layers = len(weights) output = tf.add(tf.matmul(X, weights['input']), biases['input']) output = tf.nn.dropout(tf.nn.relu(output), dropout_prob) for i in xrange(2, n_layers): output = tf.add(tf.matmul(output, weights['h%i' % i]), biases['b%i' % i]) output = tf.nn.relu(output) output = tf.add(tf.matmul(output, weights['output']), biases['output']) return output, weights, biases def inference(X, weights, biases): return model(X, weights, biases, 0.5) def loss(y_hat, y): return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(y_hat, y)) def train(loss): return tf.train.RMSPropOptimizer(learning_rate=0.0003,
def run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes): # no baseline best_result = 0 best_settings = dict() results_file = f'results/s{stochasticity}_no_baseline.csv' best_settings_file = f'results/s{stochasticity}_no_baseline_best_settings.pkl' with open(results_file, 'w') as f: f.write('discount_factor,learn_rate,hidden_dim,init_temp,result' + '\n') for discount_factor in discount_factors: for learn_rate in learn_rates: for hidden_dim in hidden_dims: for init_temp in init_temps: print('#' * 30) print('#' * 9 + ' NEW SEARCH ' + '#' * 9) print('#' * 30) print() st = time() # change this for learned baseline print( f'Search settings: baseline=run_episodes_no_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}' ) # initialize the environment env = gym.make('CartPole-v1') # <---------- change this! result = 0 for i in range(n_runs): start_time = time() policy_model = PolicyNetwork( input_dim=4, hidden_dim=hidden_dim, output_dim=2 ) # change input_ and output_dim for gridworld env seed = 40 + i set_seeds(env, seed) episode_durations, _ = run_episodes_no_baseline( policy_model, env, n_episodes, discount_factor, learn_rate, init_temp, stochasticity) result += np.mean(episode_durations) del policy_model end_time = time() h, m, s = get_running_time(end_time - start_time) print( f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) env.close() result /= n_runs with open(results_file, 'a') as f: f.write( f'{discount_factor},{learn_rate},{hidden_dim},{init_temp},{result}' + '\n') et = time() h, m, s = get_running_time(et - st) print( f'Done with search in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) print(f'Average number of steps per episode: {result}') if result > best_result: best_result = result best_settings['discount_factor'] = discount_factor best_settings['learn_rate'] = learn_rate best_settings['hidden_dim'] = hidden_dim best_settings['init_temp'] = init_temp best_settings['result'] = best_result pkl.dump(best_settings, open(best_settings_file, 'wb')) print(f'New best result!: {result}') print(f'New best settings!: {best_settings}') print() print() print() print(f'Best settings after completing grid search: {best_settings}') # Choose what to run by uncommenting #run_no_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes) #run_learned_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes) #run_selfcritic_baseline(discount_factors, learn_rates, hidden_dims, init_temps, stochasticity, n_runs, n_episodes)
def main(conf): # Prepare data train_dev = koco.load_dataset("korean-hate-speech", mode="train_dev") train, valid = train_dev["train"], train_dev["dev"] # Prepare tokenizer tokenizer = ( get_tokenizer() if "kobert" in conf.pretrained_model else AutoTokenizer.from_pretrained(conf.pretrained_model) ) if conf.tokenizer.register_names: names = pd.read_csv("entertainement_biographical_db.tsv", sep="\t")[ "name_wo_parenthesis" ].tolist() tokenizer.add_tokens(names) # Mapping string y_label to integer label if conf.label.hate: train, label2idx = map_label2idx(train, "hate") valid, _ = map_label2idx(valid, "hate") elif conf.label.bias: train, label2idx = map_label2idx(train, "bias") valid, _ = map_label2idx(valid, "bias") # Use bias as an additional context for predicting hate if conf.label.hate and conf.label.bias: biases = ["gender", "others", "none"] tokenizer.add_tokens([f"<{label}>" for label in biases]) # Prepare DataLoader train_dataset = KoreanHateSpeechDataset(train) valid_dataset = KoreanHateSpeechDataset(valid) collator = KoreanHateSpeechCollator( tokenizer, predict_hate_with_bias=(conf.label.hate and conf.label.bias) ) train_loader = DataLoader( train_dataset, batch_size=conf.train_hparams.batch_size, shuffle=True, collate_fn=collator.collate, ) valid_loader = DataLoader( valid_dataset, batch_size=conf.train_hparams.batch_size, shuffle=False, collate_fn=collator.collate, ) # Prepare model set_seeds(conf.train_hparams.seed) model = BertForSequenceClassification.from_pretrained( conf.pretrained_model, num_labels=len(label2idx) ) if conf.tokenizer.register_names: model.resize_token_embeddings(len(tokenizer)) elif conf.label.hate and conf.label.bias: model.resize_token_embeddings(len(tokenizer)) model = model.to(device) # Prepare optimizer and scheduler no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = optim.AdamW( optimizer_grouped_parameters, lr=conf.train_hparams.lr, eps=conf.train_hparams.adam_epsilon, ) n_total_iterations = len(train_loader) * conf.train_hparams.n_epochs n_warmup_steps = int(n_total_iterations * conf.train_hparams.warmup_ratio) scheduler = get_linear_schedule_with_warmup( optimizer, n_warmup_steps, n_total_iterations ) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) # Train! trainer = BertTrainer(conf.train_hparams) model = trainer.train( model, criterion, optimizer, scheduler, train_loader, valid_loader ) makedirs(conf.checkpoint_dir) makedirs(conf.log_dir) checkpoint_path = f"{conf.checkpoint_dir}/{conf.model_name}.pt" log_path = f"{conf.log_dir}/{conf.model_name}.log" torch.save({"model": model.state_dict()}, checkpoint_path) torch.save({"config": conf, "classes": label2idx, "tokenizer": tokenizer}, log_path)
# optimize and save results = grid_optimization(nb_run, nb_processes, **kwargs) utils.save_json(os.path.join(log_path, 'results.json'), results) if __name__ == '__main__': # parse arguments config = parse_args() # load json default arguments args = utils.load_json(config['config_file']) # seed for reproducibility utils.set_seeds(args['seed']) # logging model_name, logdir = args.pop('model_name'), args.pop('logdir') v = utils.get_version(name=model_name, logdir=logdir) log_path = os.path.join(logdir, f'{model_name}-v{v}') exp = config.pop('experiment') if exp == 'run': run(log_path, **args) elif exp == 'grid': nb_processes = config.pop('workers') nb_run = config.pop('average') grid(nb_run, nb_processes, log_path, **args)
def run_selfcritic_baseline(stochasticity, n_runs, n_episodes): # self-critic baseline dir_path = os.path.dirname(os.path.realpath(__file__)) best_settings_file = dir_path + f'/cart_pole_parameter_search/s{stochasticity}_SC_baseline_best_settings.pkl' eval_file = f'cart_evals/s{stochasticity}_SC_baseline.pkl' with open(best_settings_file, 'rb') as pickle_file: best_settings = pkl.load(pickle_file) discount_factor = best_settings['discount_factor'] learn_rate = best_settings['learn_rate'] hidden_dim = best_settings['hidden_dim'] init_temp = best_settings['init_temp'] st = time() # change this for learned baseline print( f'Run settings: baseline=run_episodes_with_SC_baseline, discount_factor={discount_factor}, learn_rate={learn_rate}, hidden_dim={hidden_dim}, init_temp={init_temp}' ) # initialize the environment env = gym.make('CartPole-v1') episode_durations_list = [] reinforce_loss_list = [] for i in range(n_runs): start_time = time() policy_model = PolicyNetwork( input_dim=4, hidden_dim=hidden_dim, output_dim=2) # change input_ and output_dim for gridworld env seed = 40 + i set_seeds(env, seed) episode_durations, reinforce_loss = run_episodes_with_SC_baseline( policy_model, env, n_episodes, discount_factor, learn_rate, init_temp, stochasticity) episode_durations_list.append(episode_durations) reinforce_loss_list.append(reinforce_loss) del policy_model end_time = time() h, m, s = get_running_time(end_time - start_time) print( f'Done with run {i+1}/{n_runs} in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' ) env.close() et = time() h, m, s = get_running_time(et - st) evals = {} evals['episode_durations'] = episode_durations_list evals['reinforce_loss'] = reinforce_loss_list pkl.dump(evals, open(eval_file, 'wb')) print( f'Done with runs in {f"{h} hours, " if h else ""}{f"{m} minutes and " if m else ""}{s} seconds' )
opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) opt.step() def parse_args(): # Adam: ~55% test accuracy, RMSprop: ~60% test accuracy with RnnModel1 (single layer) parser = argparse.ArgumentParser() parser.add_argument('--dataroot', required=True) parser.add_argument('--batch-size', type=int, default=256) parser.add_argument('--iterations', type=int, default=int(1e4)) parser.add_argument('--test-interval', type=int, default=100) parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--clip', type=float, default=1) parser.add_argument('--seed', default=1, type=int) parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu') parser.add_argument('--out', default='results') args = parser.parse_args() args.out = os.path.join(args.out, unique_string()) return args if __name__ == '__main__': args = parse_args() print(args) set_seeds(args.seed) main(args)
def main(config='config/finetune/agnews/train.json'): cfg = Config(**json.load(open(config, "r"))) cfg_data = data.Config(**json.load(open(cfg.cfg_data, "r"))) cfg_model = models.Config(**json.load(open(cfg.cfg_model, "r"))) cfg_optim = trainer.Config(**json.load(open(cfg.cfg_optim, "r"))) set_seeds(cfg.seed) ### Prepare Dataset and Preprocessing ### TaskDataset = data.get_class(cfg_data.task) # task dataset class according to the task tokenizer = tokenization.FullTokenizer(vocab_file=cfg_data.vocab_file, do_lower_case=True) dataset = TaskDataset(cfg_data.data_file[cfg.mode], pipelines=[ data.RemoveSymbols('\\'), data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), data.AddSpecialTokensWithTruncation(cfg_data.max_len), data.TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, cfg_data.max_len) ], n_data=None) tensors = TensorDataset(*dataset.get_tensors()) # To Tensors data_iter = DataLoader(tensors, batch_size=cfg_optim.batch_size, shuffle=False) ### Fetch Teacher's output and put it into the dataset ### def fetch_logits(model): def get_logits(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) return 0.0, logits train_loop = trainer.TrainLoop(cfg_optim, model, data_iter, None, None, get_device()) results = torch.cat(train_loop.eval(get_logits, cfg.model_file)) return results if cfg.mode == "train": print("Fetching teacher's output...") teacher = models.Classifier4Transformer(cfg_model, len(TaskDataset.labels)) teacher.load_state_dict(torch.load(cfg.model_file)) # use trained model with torch.no_grad(): teacher_logits = fetch_logits(teacher) tensors = TensorDataset(teacher_logits, *dataset.get_tensors()) # To Tensors data_iter = DataLoader(tensors, batch_size=cfg_optim.batch_size, shuffle=False) ### Models ### model = models.BlendCNN(cfg_model, len(TaskDataset.labels)) checkpoint.load_embedding(model.embed, cfg.pretrain_file) optimizer = optim.optim4GPU(cfg_optim, model) train_loop = trainer.TrainLoop( cfg_optim, model, data_iter, optimizer, cfg.save_dir, get_device() ) def get_loss(model, batch, global_step): # make sure loss is a scalar tensor teacher_logits, input_ids, segment_ids, input_mask, label_id = batch T = 1.0 logits = model(input_ids, segment_ids, input_mask) loss = 0.1*nn.CrossEntropyLoss()(logits, label_id) loss += 0.9*nn.KLDivLoss()( F.log_softmax(logits/T, dim=1), F.softmax(teacher_logits/T, dim=1) ) #loss = 0.9*nn.MSELoss()(logits, teacher_logits) return loss def evaluate(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() #.cpu().numpy() accuracy = result.mean() return accuracy, result if cfg.mode == "train": train_loop.train(get_loss, None, None) # not use pretrain file print("Training has been done properly.") elif cfg.mode == "eval": results = train_loop.eval(evaluate, cfg.model_file) total_accuracy = torch.cat(results).mean().item() print(f"Accuracy: {total_accuracy}")
sort_dict( experiment, ['name', 'tags', 'epoch', 'samples', 'model', 'optimizer', 'sessions']) sort_dict(session, [ 'epochs', 'batch_size', 'losses', 'seed', 'cpus', 'device', 'samples', 'status', 'datetime_started', 'datetime_completed', 'data', 'log', 'checkpoint', 'git', 'gpus' ]) experiment.sessions.append(session) pyaml.pprint(experiment, sort_dicts=False, width=200) del session # endregion # region Building phase # Seeds (set them after the random run id is generated) set_seeds(experiment.session.seed) # Model model: torch.nn.Module = import_(experiment.model.fn)( *experiment.model.args, **experiment.model.kwargs) if 'state_dict' in experiment.model: model.load_state_dict(torch.load(experiment.model.state_dict)) model.to(experiment.session.device) # Optimizer optimizer: torch.optim.Optimizer = import_( experiment.optimizer.fn)(model.parameters(), *experiment.optimizer.args, **experiment.optimizer.kwargs) if 'state_dict' in experiment.optimizer: optimizer.load_state_dict(torch.load(experiment.optimizer.state_dict))