def train_grid_search(config, initial_weights, lr_list, optimizer_list): train_generator, valid_generator = prepare_generators(config) for lr in lr_list: for optimizer in optimizer_list: config['train']['learning_rate'] = lr config['train']['optimizer'] = optimizer train_model, infer_model, freezing = prepare_model( config, initial_weights) if freezing: train_freezed(config, train_model, train_generator, valid_generator) print('Training with {} / {}'.format(lr, optimizer)) start_train(config, train_model, infer_model, train_generator, valid_generator) clear_session()
import train wordlist = train.manage_wordlist() chars_sorted = train.prepare_text_data(wordlist) dictionaries = train.create_dictionaries(chars_sorted) char_indices = dictionaries[0] pair_vector = train.vectorization(chars_sorted, char_indices) length_chars_sorted = len(chars_sorted) x = pair_vector[0] y = pair_vector[1] model = train.prepare_model(length_chars_sorted) train.build_model(model, x, y)
if not 'results' in os.listdir(): os.mkdir('results') # id of the current test if len(os.listdir('results')) >= 1: run_id = np.sort(np.array(os.listdir('results')).astype(int))[-1] + 1 else: run_id = 0 # path for saving results path = f"results/{run_id}/" os.mkdir(path) # GPU if opt.cuda: torch.cuda.set_device(opt.gpuid) else: print("WARNING: RUN WITHOUT GPU") db = prepare_db() model = prepare_model(opt) optim = prepare_optim(model, opt) train_Loss, test_Loss, test_Acc = train(model, optim, db, opt) #saving with open(f"{path}params.yaml", 'w') as file: yaml.safe_dump(params, file) np.savetxt(path + 'train_Loss.txt', train_Loss, fmt='%.2f') np.savetxt(path + 'test_Loss.txt', test_Loss, fmt='%.2f') np.savetxt(path + 'test_Acc.txt', test_Acc, fmt='%.2f')
if i != j: tr.append(f) train_dataset = Subset(dataset, np.concatenate(tr)) val_dataset = Subset(dataset, fold) train_dataloader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=12, pin_memory=True, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], num_workers=12, pin_memory=True) V = len(dataset.vocab.keys()) P = len(dataset.pos_set.keys()) model, criterion, optimizer = prepare_model( V, P, args['embed'], args['hidden'], args['layers'], args['nhead'], dropout=args['dropout'], smoothing=args['label_smoothing'], lr=args['lr'], device=device ) best_loss, best_jacc = fit(model, train_dataloader, val_dataloader, criterion, optimizer, device, args['epoch'], model_prefix + '_' + str(i)) fold_stats.append([best_loss, best_jacc]) print('Fold {} - Best Loss: {}, Best Jacc: {}'.format(i, best_loss, best_jacc)) fold_stats = np.array(fold_stats) mean = np.mean(fold_stats, axis=0) std = np.std(fold_stats, axis=0)
def train(opt): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) # set etc torch.autograd.set_detect_anomaly(True) # prepare teacher config teacher_config = load_config(opt, config_path=opt.teacher_config) teacher_config['opt'] = opt logger.info("[teacher config] :\n%s", teacher_config) # prepare student config student_config = load_config(opt, config_path=opt.config) student_config['opt'] = opt logger.info("[student config] :\n%s", student_config) # set path set_path(teacher_config) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(teacher_config) # prepare labeled dataset for meta pseudo labels mpl_loader = None if opt.mpl_data_path: mpl_loader, _ = prepare_datasets(teacher_config, train_path=opt.mpl_data_path) # ------------------------------------------------------------------------------------------------------- # distillation # ------------------------------------------------------------------------------------------------------- if opt.do_distill: # prepare and load teacher model teacher_model = prepare_model(teacher_config, bert_model_name_or_path=opt.teacher_bert_model_name_or_path) teacher_checkpoint = load_checkpoint(opt.teacher_model_path, device=opt.device) teacher_model.load_state_dict(teacher_checkpoint) teacher_model = teacher_model.to(opt.device) logger.info("[prepare teacher model and loading done]") # prepare student model student_model = prepare_model(student_config, bert_model_name_or_path=opt.bert_model_name_or_path) logger.info("[prepare student model done]") best_eval_metric=None global_step, tr_loss, best_eval_metric = distill(teacher_config, teacher_model, student_config, student_model, train_loader, valid_loader, best_eval_metric=best_eval_metric, mpl_loader=mpl_loader) logger.info(f"[distillation done] global steps: {global_step}, total loss: {tr_loss}, best metric: {best_eval_metric}") # ------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------- # structured pruning # ------------------------------------------------------------------------------------------------------- if opt.do_prune: # restore model from '--save_path', '--bert_output_dir' model = prepare_model(student_config, bert_model_name_or_path=opt.bert_output_dir) checkpoint = load_checkpoint(opt.save_path, device=opt.device) model.load_state_dict(checkpoint) model = model.to(opt.device) logger.info("[Restore best student model] : {}, {}".format(opt.bert_output_dir, opt.save_path)) eval_loss = eval_acc = 0 eval_loss, eval_acc = evaluate(model, student_config, valid_loader) logs = {} logs['eval_loss'] = eval_loss logs['eval_acc'] = eval_acc logger.info("[before pruning] :") logger.info(json.dumps({**logs})) prune_rewire(student_config, model, valid_loader, use_tqdm=True) # save pruned model to '--save_path_pruned', '--bert_output_dir_pruned' save_model(student_config, model, save_path=opt.save_path_pruned) model.bert_tokenizer.save_pretrained(opt.bert_output_dir_pruned) model.bert_model.save_pretrained(opt.bert_output_dir_pruned) logger.info("[Pruned model saved] : {}, {}".format(opt.save_path_pruned, opt.bert_output_dir_pruned))