def main(parser): if True: tasks = [ 'SQuAD', 'TriviaQA-web', 'SearchQA', 'NewsQA', 'NaturalQuestionsShort', 'HotpotQA' ] task = tasks[parser.idx] model = 'bert-base-uncased' do_lower_case = True data_dir = parser.data_dir cache_dir = parser.cache_dir log_dir = parser.log_dir tokenizer = transformers.AutoTokenizer.from_pretrained( model, do_lower_case=do_lower_case) log_name = os.path.join( log_dir, '{}_{}_cache_data_{}.log'.format(task, model, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.DEBUG) max_seq_length = 384 doc_stride = 128 max_query_length = 64 IO = myio.IO(data_dir, cache_dir, tokenizer, max_seq_length, doc_stride, max_query_length, batch_size=32, shuffle=True, cache=True) start = time.time() log.info("=" * 40 + " Loading {} {} ".format(task, 'train') + "=" * 40) _, _, _ = IO.load_and_cache_task(task, 'train') log.info("=" * 40 + " Loading {} {} ".format(task, 'dev') + "=" * 40) _, _, _ = IO.load_and_cache_task(task, 'dev') log.info("Task {} took {:.6f}s".format(task, time.time() - start)) # release logs from Python handlers = log.getLogger().handlers for handler in handlers: handler.close()
def main(): # parse arguments parser = args.parse_args() # set up logger log_path = os.path.join(parser.save_dir, "logs") if not os.path.exists(log_path): os.mkdir(log_path) log_fname = os.path.join( log_path, "{}_log_{}.log".format(parser.exp_name, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_fname, format='%(asctime)s: %(name)s || %(message)s', level=log.INFO) # ============================================================================= # start # ============================================================================= log.info("=" * 40 + " Start Program " + "=" * 40) # ============================================================================= # misc stuff # ============================================================================= # Set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set random seeds random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) # set data directory log.info("Data Directory is {}.".format(parser.data_dir)) # ============================================================================= # import data # ============================================================================= task_names = parser.data_name.split(',') content_headers = parser.content.split(',') tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) data_handler = myio.IO( data_dir=parser.data_dir, model_name=parser.model, task_names=task_names, tokenizer=tokenizer, max_length=parser.input_length, content=content_headers, review_key=parser.review_key, label_name=parser.label_name, val_split=parser.val_split, test_split=parser.test_split, batch_size=parser.batch_size, shuffle=not parser.no_shuffle, cache=not parser.no_cache, ) data_handler.read_task() # ============================================================================= # define model # ============================================================================= log.info("=" * 40 + " Defining Model " + "=" * 40) config = transformers.AutoConfig.from_pretrained(parser.model) classifier = model.Model( model=parser.model, config=config, n_others=parser.n_others, n_hidden=parser.n_class_hidden, n_flag=parser.n_labels, load=parser.preload_emb, load_name=parser.preload_emb_name, ) # ============================================================================= # define trainer # ============================================================================= log.info("Save Directory is {}.".format(parser.save_dir)) log.info("=" * 40 + " Defining Trainer " + "=" * 40) # create trainer object trainer = learner.Learner( model=classifier, device=device, myio=data_handler, max_epochs=parser.max_epochs, save_path=parser.save_dir, lr=parser.lr, weight_decay=parser.weight_decay, pct_start=parser.pct_start, anneal_strategy=parser.anneal_strategy, cycle_momentum=parser.cycle_momentum, log_int=parser.log_int, buffer_break=not parser.no_early_stop, break_int=parser.patience, accumulate_int=parser.grad_accum, max_grad_norm=parser.max_grad_norm, n_others=parser.n_others, batch_size=parser.batch_size, check_int=parser.check_int, save=parser.save, test=parser.test, ) # train model best = trainer.learn( model_name=parser.model, task_name=task_names[0], early_check=parser.early_check, debug=parser.debug, ) best['experiment'] = parser.exp_name #write results to "results.jsonl" if not os.path.exists(parser.save_dir): os.mkdir(parser.save_dir) results_name = os.path.join(parser.save_dir, "results.jsonl") with open(results_name, 'a') as f: f.write(json.dumps(best) + "\n") log.info("=" * 40 + " Program Complete " + "=" * 40) log.info("=" * 40 + " Results written to {} ".format(results_name) + "=" * 40)
format='%(asctime)s - %(name)s - %(message)s', level=log.INFO) root.addHandler(log.StreamHandler()) log.info('Start') if True: # set parameters for IO object data_dir = os.path.join(wd, r'cleaned') task_names = ['tester'] tokenizer = transformers.AutoTokenizer.from_pretrained('albert-base-v2') max_length = 512 # read in 'tester' data in both train and dev directories # only do batch_size of 2 data_handler = myio.IO(data_dir, task_names, tokenizer, max_length, batch_size = 2) data_handler.read_task() # see that it works if False: for use in ['train','dev']: # get training data_loader dl = data_handler.tasks.get('tester').get(use) for i,(data, labels) in enumerate(dl): print(r'{} batch {} data size is: {}'.format(use, i, data.size())) print(r'{} batch {} data is: {}'.format(use, i, data)) for k, obs in enumerate(data): print(r'{} batch {} obs {} decoded: {}'.format(use, i, k, tokenizer.decode(obs.tolist()))) print(r'{} batch {} size is: {}'.format(use, i, labels.size()))
def main(): # parse arguments parser = args.args # get working directory wd = os.getcwd() # set up logger log_fname = os.path.join(wd, "logs", "log_{}.log".format( dt.now().strftime("%Y%m%d_%H%M"))) root = log.getLogger() while len(root.handlers): root.removeHandler(root.handlers[0]) log.basicConfig(filename=log_fname, format='%(asctime)s: %(name)s || %(message)s', level=log.INFO) root.addHandler(log.StreamHandler()) # ============================================================================= # start # ============================================================================= log.info("="*40 + " Start Program " + "="*40) # ============================================================================= # misc stuff # ============================================================================= label_order = {'rating':0, 'flagged':1} # Set devise to CPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set random seeds random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if device == "cuda": torch.cuda.manual_seed(parser.seed) torch.cuda.manual_seed_all(parser.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True # set data directory if os.path.isdir(parser.data_dir): data_path = parser.data_dir else: data_path = os.path.join(wd, parser.data_dir) log.info("Data Directory is {}.".format(data_path)) # ============================================================================= # import data # ============================================================================= task_names = [parser.data_name] tokenizer = transformers.AutoTokenizer.from_pretrained(parser.model) label_names = parser.label_names.split(',') data_handler = myio.IO(data_dir = data_path, task_names = task_names, tokenizer = tokenizer, max_length = parser.input_length, val_split = parser.val_split, test_split = parser.test_split, batch_size = parser.batch_size, label_names = label_names ) data_handler.read_task() # ============================================================================= # define model # ============================================================================= log.info("="*40 + " Defining Model " + "="*40) number_labels = [int(n) for n in parser.label_numbers.split(',')] config = transformers.AutoConfig.from_pretrained(parser.model) classifier = model.Model(config=config, nrating = number_labels[label_order.get('rating')], nflag = number_labels[label_order.get('flagged')] ) # ============================================================================= # define trainer # ============================================================================= weights = [float(w) for w in parser.label_weights.split(',')] train_data = data_handler.tasks.get(parser.data_name).get('train') val_data = data_handler.tasks.get(parser.data_name).get('dev') test_data = data_handler.tasks.get(parser.data_name).get('test') if os.path.isdir(parser.save_dir): save_path = parser.save_dir else: save_path = os.path.join(wd, parser.save_dir) log.info("Save Directory is {}.".format(save_path)) log.info("="*40 + " Defining Trainer " + "="*40) # create trainer object trainer = learner.Learner(model = classifier, device = device, train_data = train_data, val_data = val_data, test_data = test_data, rating_w = weights[label_order.get('rating')], flag_w = weights[label_order.get('flagged')], max_epochs = parser.max_epochs, save_path = save_path, lr = parser.lr, buffer_break = (parser.early_stop == 'True'), break_int = parser.patience ) # train model best_path, best_emb_path = trainer.learn(model_name = parser.model, verbose = True, early_check = parser.early_stop ) log.info("="*40 + " Program Complete " + "="*40) log.info("Best Total Weights in {}".format(best_path)) log.info("Best Embedding Weights in {}".format(best_emb_path)) # release logs for handler in log.getLogger().handlers: handler.close() # exit python sys.exit(0)
def main(): """ Main method for meta-learning """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_meta_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} meta learning on {} with model {}".format( parser.experiment, device, parser.model)) # set tokenizer and config from Huggingface tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) config = transformers.AutoConfig.from_pretrained(parser.model) # create IO object and import data cache_head = os.path.join(parser.save_dir, 'cached_data') cache_dir = os.path.join(cache_head, parser.model) if not os.path.exists(cache_head): os.mkdir(cache_head) if not os.path.exists(cache_dir): os.mkdir(cache_dir) data_handler = myio.IO(parser.data_dir, cache_dir, tokenizer, parser.max_seq_length, parser.doc_stride, parser.max_query_length, batch_size=parser.batch_size, shuffle=True, cache=True) # set oml oml = meta_learner.MetaLearningClassification( update_lr=parser.meta_update_lr, meta_lr=parser.meta_meta_lr, hf_model_name=parser.model, config=config, myio=data_handler, max_grad_norm=parser.max_grad_norm, device=device) if isinstance(oml.net, nn.DataParallel): rln = oml.net.module.model.bert else: rln = oml.net.model.bert old_weights = copy.deepcopy(rln) # freeze_layers oml.freeze_rln() # do meta_learning meta_tasks = parser.meta_tasks.split(',') # create save path meta_RLN_head = os.path.join(parser.save_dir, "meta_weights") if not os.path.exists(meta_RLN_head): os.mkdir(meta_RLN_head) meta_RLN_weights = os.path.join(meta_RLN_head, parser.experiment + "_meta_weights.pt") meta_steps = trange(0, parser.meta_steps, desc='Meta Outer', mininterval=30) running_loss = 0 for step in meta_steps: # sample tasks sample_tasks = np.random.choice(meta_tasks, parser.n_meta_tasks, replace=False) # sample trajectory d_traj = [] d_rand = [] for task in sample_tasks: task_traj, task_rand = data_handler.sample_dl( task=task, samples=parser.n_meta_task_samples, use='train') d_traj += task_traj d_rand += task_rand loss = oml(d_traj, d_rand) running_loss += loss if step % parser.verbose_steps == 0: log.info( f"OML Loss is {loss} | Step {step} | Average is {running_loss/max(1,step)}" ) # check if rln weights are changing changed = False if isinstance(oml.net, nn.DataParallel): rln = oml.net.module.model.bert else: rln = oml.net.model.bert for old, new in zip(old_weights.parameters(), rln.parameters()): if not old.equal(new): changed = True break assert changed, "Weights are the same" # save every meta step # for multi-GPU if isinstance(oml.net, nn.DataParallel): weights = oml.net.module.model.bert.state_dict() else: weights = oml.net.model.bert.state_dict() torch.save(weights, meta_RLN_weights) log.info( f"Meta loss is {loss} | Step {step} | Average is {running_loss/(step+1)}" ) log.info(f"Changed weights: {changed}") log.info("Saved meta weights at {}".format(meta_RLN_weights)) log.info("Total time is: {} min : {} s".format((time.time() - start) // 60, (time.time() - start) % 60))
import myio import transformers data_handler = myio.IO( data_dir="../../dataset/", task_names=["Test"], tokenizer=transformers.AutoTokenizer.from_pretrained('albert-base-v2'), max_length=100000) data_handler.read_task()
# ============================= Testing Data Loading ============================== if False: data_dir = 'data' task_names = ['tester'] tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased') max_seq_length = 384 doc_stride = 128 max_query_length = 64 threads = 1 # create IO object with batch size 2 for testing data_handler = myio.IO(task_names, tokenizer, max_seq_length, doc_stride, max_query_length, threads, batch_size = 2, data_dir = data_dir) data_handler.read_tasks() dl_train = data_handler.tasks.get('tester').get('train').get('data') dl_dev = data_handler.tasks.get('tester').get('dev').get('data') data_handler2 = myio.IO(task_names, tokenizer, max_seq_length, doc_stride, max_query_length, threads, batch_size = 2, data_dir = data_dir)
def main(): """ Main method for experiment """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} on {} with model {}".format( parser.experiment, device, parser.model)) print("{}".format(parser.experiment)) # set tokenizer and config from Huggingface tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) config = transformers.AutoConfig.from_pretrained(parser.model) # create IO object and import data cache_head = os.path.join(parser.save_dir, 'cached_data') cache_dir = os.path.join(cache_head, parser.model) if not os.path.exists(cache_head): os.mkdir(cache_head) if not os.path.exists(cache_dir): os.mkdir(cache_dir) data_handler = myio.IO(parser.data_dir, cache_dir, tokenizer, parser.max_seq_length, parser.doc_stride, parser.max_query_length, batch_size=parser.batch_size, shuffle=True, cache=True) # ============================================================================= # BASELINE # ============================================================================= # parse continual learning curriculum parser.continual_curriculum = parser.continual_curriculum.split(',') # create BERT model BERTmodel = model.QAModel( parser.model, config, load_rln=parser.load_rln, rln_weights=parser.rln_weights, ) # create learner object for BERT model trainer = learner.Learner( parser.access_mode, parser.fp16, parser.fp16_opt_level, BERTmodel, parser.model, device, data_handler, parser.save_dir, parser.n_best_size, parser.max_answer_length, parser.do_lower_case, parser.verbose_logging, parser.version_2_with_negative, parser.null_score_diff_threshold, max_steps=parser.fine_tune_steps, log_int=parser.logging_steps, best_int=parser.save_steps, verbose_int=parser.verbose_steps, max_grad_norm=parser.max_grad_norm, optimizer=None, weight_decay=parser.weight_decay, lr=parser.learning_rate, eps=parser.adam_epsilon, warmup_steps=parser.warmup_steps, freeze_embeddings=parser.freeze_embeddings, ) # create continual learning object and perform continual learning c_learner = cont_learning.ContLearner( parser.model, 'BERT', trainer, curriculum=parser.continual_curriculum, fine_tune_prev=not parser.no_prev_fine_tune) log.info("Starting Continual Learning") if not parser.no_cont_learning: c_learner.c_learn(rln_only=parser.carry_rln_only) if len(parser.continual_curriculum) > 1 and not parser.no_forget_eval: c_learner.evaluate_forgetting(rln_only=parser.carry_rln_only) log.info("Generating Plot") # generate BERT plot now = dt.now().strftime("%Y%m%d_%H%M") # create results folders if not generated plot_dir = os.path.join(parser.save_dir, "plots") json_dir = os.path.join(parser.save_dir, "json_results") if not os.path.exists(plot_dir): os.mkdir(plot_dir) if not os.path.exists(json_dir): os.mkdir(json_dir) # plot results and save plot = analyze.plot_learning(c_learner.scores, x_tick_int=2 * parser.logging_steps, iterations=parser.fine_tune_steps) plot_name = os.path.join( plot_dir, "baseline_{}_{}_{}.png".format(parser.experiment, parser.model, now)) plot.savefig(plot_name) os.chmod(plot_name, parser.access_mode) log.info("Plot saved at: {}".format(plot_name)) # write data to json baseline_results_name = os.path.join( json_dir, "baseline_{}_{}_{}.json".format(parser.experiment, parser.model, now)) with open(baseline_results_name, 'w') as fw: json.dump(c_learner.scores, fw) os.chmod(baseline_results_name, parser.access_mode) log.info( "Baseline results written to: {}".format(baseline_results_name)) log.info("Total time is: {}min : {}s".format((time.time() - start) // 60, (time.time() - start) % 60))
def main(): """ Main method for experiment """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() if parser.run_log == 'log': parser.run_log = os.path.join(parser.save_dir, 'log') if not os.path.exists(parser.run_log): os.mkdir(parser.run_log) # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} on {}".format(parser.experiment, device)) data_handler = myio.IO( data_dir=parser.data_dir, # directory storing data batch_size=parser.batch_size, # batch size shuffle=not parser.no_shuffle, # whether to shuffle training data split=parser.val_split, # percentage of data for validation ) # TODO: # create model my_model = model.Model( road_lambda=parser.road_lambda, # relative weight of road map loss box_lambda=parser.box_lambda, # relative weight of bounding box loss preload_backbone=parser.preload, # whether to load pretrained weights backbone_weights=parser. preload_weights, # pretrained backbone weights if needed ) # create learner trainer = learner.Learner( access_mode=parser.access_mode, # os access mode for created files experiment_name=parser.experiment, # name of experiment model=my_model, # model device=device, # device to run experiment myio=data_handler, # myio.IO object for loading data save_dir=parser.save_dir, # directory to save results max_steps=parser.training_steps, # maximum number of update steps best_int=parser.save_steps, # interval for checking weights verbose_int=parser.verbose_steps, # interval for logging information max_grad_norm=parser. max_grad_norm, # maximum gradients to avoid exploding gradients optimizer=None, # optimizer for training weight_decay=parser.weight_decay, # weight decay if using lr=parser.learning_rate, # learning rate eps=parser.adam_epsilon, # epsilon to use for adam accumulate_int=parser. accumulate_int, # number of steps to accumulate gradients before stepping batch_size=parser.batch_size, # batch size warmup_pct=parser. pct_start, # percent of updates used to warm-up learning rate save=not parser.no_save, # whether to save weights patience=parser. patience, # number of checks without improvement before early stop ) # train model results = trainer.train(labeled=not parser.no_label, debug=parser.debug) results["experiment"] = parser.experiment # write results to "results.jsonl" results_name = os.path.join(parser.save_dir, "results.jsonl") with open(results_name, 'a') as f: f.write(json.dumps(results) + "\n") os.chmod(results_name, parser.access_mode) log.info("Results written to: {}".format(results_name)) log.info("Total time is: {} min : {} sec".format( (time.time() - start) // 60, (time.time() - start) % 60))