config = { "n": 10, "lambda": 1, "n_iters": 3, "norm": False, "base_k": 'subtree' } ### Load Data if not args['split_ready']: X_train, Y_train, X_test, Y_test = get_dataset(args) elif args['eval_on_valid']: X_train, Y_train = get_splitted(args['trainfile']) X_test, Y_test = get_splitted(args['validfile']) else: X_train, Y_train = get_splitted(args['trainfile']) X_valid, Y_valid = get_splitted(args['validfile']) if X_train is not None and X_valid is not None: X_train = np.concatenate([X_train, X_valid]) if Y_train is not None and Y_valid is not None: Y_train = np.concatenate([Y_train, Y_valid]) X_test, Y_test = get_splitted(args['testfile']) ### Run Model if args['mode'] in ['train', 'train_eval']: train_eval(config, args, X_train, Y_train, X_test, Y_test) elif args['mode'] == 'evaluate': evaluate(args, X_test, Y_test) else: predict(args, X_test)
# continue model = x.Model(config).to(config.device) print("Loading " + str(fold + 1) + " fold data...") train_idx = shuffle(train_idx) train_inputs = [datas[i] for i in train_idx] valid_inputs = [datas[i] for i in valid_idx] train_iter = build_iterator(train_inputs, config.batch_size, config) dev_iter = build_iterator(valid_inputs, config.test_batch, config) test_iter = build_iterator(test_inputs, config.test_batch, config) valid_outputs = np.array([], dtype=int) for d, (text, labels) in enumerate(dev_iter): valid_outputs = np.append(valid_outputs, labels.data.cpu().numpy()) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) train(config, model, train_iter, dev_iter, fold) oof_p = predict(config, model, dev_iter, fold, activation='softmax') oof[valid_idx] = oof_p valid_preds.append(oof_p) f1, t = search_f1(valid_outputs, valid_preds[-1]) print('validation score = ', f1) each_fold_predict = predict(config, model, test_iter, fold, activation='softmax') test_preds.append(each_fold_predict) sub = each_fold_predict > t # df_test['label'] = sub.astype(int) # df_test[['id', 'id_sub', 'label']].to_csv('submission_beike_{}.tsv'.format(fold), index=False, # header=None, sep='\t') torch.cuda.empty_cache() outputs = np.asarray(df_train['label']) best_score, best_t = search_f1(outputs, oof)
def main(): #parse arguments config.parse() args = config.args for k, v in vars(args).items(): logger.info(f"{k}:{v}") #set seeds torch.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) np.random.seed(args.random_seed) random.seed(args.random_seed) #arguments check device, n_gpu = args_check(args) os.makedirs(args.output_dir, exist_ok=True) forward_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) args.forward_batch_size = forward_batch_size #load bert config bert_config_S = BertConfig.from_json_file(args.bert_config_file_S) assert args.max_seq_length <= bert_config_S.max_position_embeddings #read data train_examples = None train_features = None eval_examples = None eval_features = None num_train_steps = None tokenizer = ChineseFullTokenizer(vocab_file=args.vocab_file, do_lower_case=args.do_lower_case) convert_fn = partial(convert_examples_to_features, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length) if args.do_train: train_examples, train_features = read_and_convert( args.train_file, is_training=True, do_lower_case=args.do_lower_case, read_fn=read_squad_examples, convert_fn=convert_fn) if args.fake_file_1: fake_examples1, fake_features1 = read_and_convert( args.fake_file_1, is_training=True, do_lower_case=args.do_lower_case, read_fn=read_squad_examples, convert_fn=convert_fn) train_examples += fake_examples1 train_features += fake_features1 if args.fake_file_2: fake_examples2, fake_features2 = read_and_convert( args.fake_file_2, is_training=True, do_lower_case=args.do_lower_case, read_fn=read_squad_examples, convert_fn=convert_fn) train_examples += fake_examples2 train_features += fake_features2 num_train_steps = int(len(train_features) / args.train_batch_size) * args.num_train_epochs if args.do_predict: eval_examples, eval_features = read_and_convert( args.predict_file, is_training=False, do_lower_case=args.do_lower_case, read_fn=read_squad_examples, convert_fn=convert_fn) #Build Model and load checkpoint model_S = BertForQASimple(bert_config_S, args) #Load student if args.load_model_type == 'bert': assert args.init_checkpoint_S is not None state_dict_S = torch.load(args.init_checkpoint_S, map_location='cpu') state_weight = { k[5:]: v for k, v in state_dict_S.items() if k.startswith('bert.') } missing_keys, _ = model_S.bert.load_state_dict(state_weight, strict=False) assert len(missing_keys) == 0 elif args.load_model_type == 'all': assert args.tuned_checkpoint_S is not None state_dict_S = torch.load(args.tuned_checkpoint_S, map_location='cpu') model_S.load_state_dict(state_dict_S) else: logger.info("Model is randomly initialized.") model_S.to(device) if args.local_rank != -1 or n_gpu > 1: if args.local_rank != -1: raise NotImplementedError elif n_gpu > 1: model_S = torch.nn.DataParallel(model_S) #,output_device=n_gpu-1) if args.do_train: #parameters params = list(model_S.named_parameters()) all_trainable_params = divide_parameters(params, lr=args.learning_rate) logger.info("Length of all_trainable_params: %d", len(all_trainable_params)) optimizer = BERTAdam(all_trainable_params, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_steps, schedule=args.schedule, s_opt1=args.s_opt1, s_opt2=args.s_opt2, s_opt3=args.s_opt3) logger.info("***** Running training *****") logger.info(" Num orig examples = %d", len(train_examples)) logger.info(" Num split examples = %d", len(train_features)) logger.info(" Forward batch size = %d", forward_batch_size) logger.info(" Num backward steps = %d", num_train_steps) ########### DISTILLATION ########### train_config = TrainingConfig( gradient_accumulation_steps=args.gradient_accumulation_steps, ckpt_frequency=args.ckpt_frequency, log_dir=args.output_dir, output_dir=args.output_dir, device=args.device) distiller = BasicTrainer(train_config=train_config, model=model_S, adaptor=BertForQASimpleAdaptorTraining) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_doc_mask = torch.tensor([f.doc_mask for f in train_features], dtype=torch.float) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor( [f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor( [f.end_position for f in train_features], dtype=torch.long) train_dataset = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_doc_mask, all_start_positions, all_end_positions) if args.local_rank == -1: train_sampler = RandomSampler(train_dataset) else: raise NotImplementedError train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.forward_batch_size, drop_last=True) callback_func = partial(predict, eval_examples=eval_examples, eval_features=eval_features, args=args) with distiller: distiller.train(optimizer, scheduler=None, dataloader=train_dataloader, num_epochs=args.num_train_epochs, callback=callback_func) if not args.do_train and args.do_predict: res = predict(model_S, eval_examples, eval_features, step=0, args=args) print(res)
torch.save(model, f) best_val = val_loss if epoch % 10 == 0: test_loss, test_rae, test_corr, test_f1 = eval_method( Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args) writer.add_scalar(args.save + '/testloss', test_loss, epoch) writer.add_scalar(args.save + '/testrae', test_rae, epoch) writer.add_scalar(args.save + '/testcorr', test_corr, epoch) writer.add_scalar(args.save + '/testf1', test_f1, epoch) print( "| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f} | f1 {:5.4f}\n" .format(test_loss, test_rae, test_corr, test_f1)) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f, ) test_acc, test_rae, test_corr, test_f1 = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args) writer.add_scalar(args.save + '/testf1', test_f1, args.epochs + 1) print('Best model performance:') print( "| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}| test f1 {:5.4f}" .format(test_acc, test_rae, test_corr, test_f1)) predict(Data, Data.test[0], Data.test[1], model, args) print(modelname)
config = x.Config(args.word, dataset) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 start_time = time.time() print("Loading data...") train_data, dev_data, test_data = build_dataset(config, do_train, do_test or do_predict, args.word) if do_train: train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) if do_test: test_iter = build_iterator(test_data, config) if do_predict: data_iter = build_iterator(test_data, config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # train model = x.Model(config).to(config.device) if do_train: train(config, model, train_iter, dev_iter) if do_test: test(config, model, test_iter) if do_predict: predict(config, model, data_iter)
# Visualize loss and accuracy history pl.plot_accuracy(history) pl.plot_losses(history) # Evaluate final model scores = te.evaluate(model=mnist_model, val_loader=test_loader) print('Test scores: ', scores) # Predict on a few inputs test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor()) x, label = dataset[0] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model) print('True label: {}, Predicted: {}'.format(label, pred)) x, label = dataset[111] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model) print('True label: {}, Predicted: {}'.format(label, pred)) x, label = dataset[222] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model) print('True label: {}, Predicted: {}'.format(label, pred)) x, label = dataset[333] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model)
torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True start_time = time.time() print("Loading data...") vocab, train_data, dev_data, test_data = build_dataset(config, args.word) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) test_iter = build_iterator(test_data, config) time_dif = get_time_diff(start_time) print("Time usage:", time_dif) config.num_vocab = len(vocab) model = x.Model(config).to(config.device) init_network(model) print(model.parameters) train(config, model, train_iter, dev_iter) predictions = predict(config, model, test_iter) if not os.path.exists('result'): os.mkdir('result') with open(os.path.join( 'result', model_name + ('_word_' if args.word else '_char_') + 'result.txt'), 'w', encoding='UTF-8') as f: for i, label in enumerate(predictions): print(f'{i}.txt {label}', file=f)
batch_size = 256 hidden_size = 128 num_layers = 1 dropout = 0 testnum = 500 # interval is sample interval between last input and first output. interval = 0 epoch = 100 device = 'cuda' # Generate sin dataset for training and testing. dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)]) x_train, y_train, x_test, y_test, normalizer = generate_data( dataset, 'minmax', input_length, output_length, testnum, interval) # Build, train and predict. model = GRU(1, hidden_size, num_layers, 1, dropout) optimizer = opt.Adam(model.parameters()) loss = nn.MSELoss() batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch, batch_size, optimizer, loss, device) y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer, batch_size) # Draw result plt.plot(y_predict, label='prediction') plt.plot(y_real, label='real') plt.legend() plt.show()