def wrapper_ngram(data=TREC, resplit=True, validate_ratio=0.2): train_x, train_y, validate_x, validate_y, test_x, test_y, \ W, mask = prepare_datasets(data, resplit=resplit, validation_ratio=validate_ratio) # get input shape input_shape = (train_x[0].shape[0], W.shape[1]) print "input data shape", input_shape n_out = len(np.unique(test_y)) shuffle_indices = np.random.permutation(train_x.shape[0]) datasets = (train_x[shuffle_indices], train_y[shuffle_indices], validate_x, validate_y, test_x, test_y) test_accuracy = train_ngram_net( U=W, datasets=datasets, n_epochs=10, ngrams=(3, 2), ngram_out=(150, 50), non_static=False, input_shape=input_shape, concat_out=True, n_kernels=(8, 16), use_bias=False, lr_rate=0.02, dropout=True, dropout_rate=0.5, n_hidden=600, n_out=n_out, ngram_activation=leaky_relu, activation=leaky_relu, batch_size=50, l2_ratio=1e-5, update_rule='adagrad', skip_gram=False, ) return test_accuracy
def error_analysis(data=SST_SENT_POL): train_x, train_y, validate_x, validate_y, test_x, test_y, \ W, mask = prepare_datasets(data, resplit=False, validation_ratio=0.0) # get input shape input_shape = (train_x[0].shape[0], W.shape[1]) print "input data shape", input_shape n_out = len(np.unique(test_y)) shuffle_indices = np.random.permutation(train_x.shape[0]) datasets = (train_x[shuffle_indices], train_y[shuffle_indices], validate_x, validate_y, test_x, test_y) best_prediction = train_ngram_net( U=W, datasets=datasets, n_epochs=10, ngrams=(1, 2), ngram_out=(300, 250), non_static=False, input_shape=input_shape, concat_out=False, n_kernels=(4, 4), use_bias=False, lr_rate=0.02, dropout=True, dropout_rate=0.2, n_hidden=250, n_out=n_out, ngram_activation=leaky_relu, activation=leaky_relu, batch_size=50, l2_ratio=1e-5, update_rule='adagrad', skip_gram=False, predict=True ) raw_datasets = load_raw_datasets(datasets=data) _, _, validate_raw, _, _, _ = raw_datasets from collections import Counter errors = [] for i in xrange(len(best_prediction)): if best_prediction[i] != validate_y[i]: errors.append("%d & %d" % (validate_y[i], best_prediction[i])) print validate_y[i], best_prediction[i], " ".join(validate_raw[i]) errors = Counter(errors) print errors.most_common(10)
def wrapper_reversed_rec(data=SST_SENT_POL, resplit=True, validate_ratio=0.2, rec_type='lstm'): train_x, train_y, validate_x, validate_y, test_x, test_y, \ W, mask = prepare_datasets(data, resplit=resplit, validation_ratio=validate_ratio, google=False) # get input shape input_shape = (train_x[0].shape[0], W.shape[1]) print "input data shape", input_shape n_out = len(np.unique(test_y)) shuffle_indices = np.random.permutation(train_x.shape[0]) datasets = (train_x[shuffle_indices], train_y[shuffle_indices], validate_x, validate_y, test_x, test_y) test_accuracy = train_ngram_rec_net( reverse=True, U=W, non_static=False, datasets=datasets, n_epochs=20, use_bias=True, ngrams=(2, 2), input_shape=input_shape, n_kernels=(4, 4), ngram_out=(300, 250), lr_rate=0.02, dropout_rate=0.3, concat_out=False, rec_hidden=300, mlp_hidden=300, n_out=n_out, ngram_activation=tanh, mlp_activation=leaky_relu, rec_activation=tanh, batch_size=50, update_rule='adagrad', rec_type=rec_type, clipping=1, l2_ratio=1e-5, mask=mask, mlp=True, skip_gram=False, bidirection=True ) return test_accuracy
def train(opt): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) # set etc torch.autograd.set_detect_anomaly(True) # prepare teacher config teacher_config = load_config(opt, config_path=opt.teacher_config) teacher_config['opt'] = opt logger.info("[teacher config] :\n%s", teacher_config) # prepare student config student_config = load_config(opt, config_path=opt.config) student_config['opt'] = opt logger.info("[student config] :\n%s", student_config) # set path set_path(teacher_config) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(teacher_config) # prepare labeled dataset for meta pseudo labels mpl_loader = None if opt.mpl_data_path: mpl_loader, _ = prepare_datasets(teacher_config, train_path=opt.mpl_data_path) # ------------------------------------------------------------------------------------------------------- # distillation # ------------------------------------------------------------------------------------------------------- if opt.do_distill: # prepare and load teacher model teacher_model = prepare_model(teacher_config, bert_model_name_or_path=opt.teacher_bert_model_name_or_path) teacher_checkpoint = load_checkpoint(opt.teacher_model_path, device=opt.device) teacher_model.load_state_dict(teacher_checkpoint) teacher_model = teacher_model.to(opt.device) logger.info("[prepare teacher model and loading done]") # prepare student model student_model = prepare_model(student_config, bert_model_name_or_path=opt.bert_model_name_or_path) logger.info("[prepare student model done]") best_eval_metric=None global_step, tr_loss, best_eval_metric = distill(teacher_config, teacher_model, student_config, student_model, train_loader, valid_loader, best_eval_metric=best_eval_metric, mpl_loader=mpl_loader) logger.info(f"[distillation done] global steps: {global_step}, total loss: {tr_loss}, best metric: {best_eval_metric}") # ------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------- # structured pruning # ------------------------------------------------------------------------------------------------------- if opt.do_prune: # restore model from '--save_path', '--bert_output_dir' model = prepare_model(student_config, bert_model_name_or_path=opt.bert_output_dir) checkpoint = load_checkpoint(opt.save_path, device=opt.device) model.load_state_dict(checkpoint) model = model.to(opt.device) logger.info("[Restore best student model] : {}, {}".format(opt.bert_output_dir, opt.save_path)) eval_loss = eval_acc = 0 eval_loss, eval_acc = evaluate(model, student_config, valid_loader) logs = {} logs['eval_loss'] = eval_loss logs['eval_acc'] = eval_acc logger.info("[before pruning] :") logger.info(json.dumps({**logs})) prune_rewire(student_config, model, valid_loader, use_tqdm=True) # save pruned model to '--save_path_pruned', '--bert_output_dir_pruned' save_model(student_config, model, save_path=opt.save_path_pruned) model.bert_tokenizer.save_pretrained(opt.bert_output_dir_pruned) model.bert_model.save_pretrained(opt.bert_output_dir_pruned) logger.info("[Pruned model saved] : {}, {}".format(opt.save_path_pruned, opt.bert_output_dir_pruned))