예제 #1
0
    config = {
        "n": 10,
        "lambda": 1,
        "n_iters": 3,
        "norm": False,
        "base_k": 'subtree'
    }

### Load Data
if not args['split_ready']:
    X_train, Y_train, X_test, Y_test = get_dataset(args)
elif args['eval_on_valid']:
    X_train, Y_train = get_splitted(args['trainfile'])
    X_test, Y_test = get_splitted(args['validfile'])
else:
    X_train, Y_train = get_splitted(args['trainfile'])
    X_valid, Y_valid = get_splitted(args['validfile'])
    if X_train is not None and X_valid is not None:
        X_train = np.concatenate([X_train, X_valid])
    if Y_train is not None and Y_valid is not None:
        Y_train = np.concatenate([Y_train, Y_valid])
    X_test, Y_test = get_splitted(args['testfile'])

### Run Model
if args['mode'] in ['train', 'train_eval']:
    train_eval(config, args, X_train, Y_train, X_test, Y_test)
elif args['mode'] == 'evaluate':
    evaluate(args, X_test, Y_test)
else:
    predict(args, X_test)
예제 #2
0
        #     continue
        model = x.Model(config).to(config.device)
        print("Loading " + str(fold + 1) + " fold data...")
        train_idx = shuffle(train_idx)
        train_inputs = [datas[i] for i in train_idx]
        valid_inputs = [datas[i] for i in valid_idx]
        train_iter = build_iterator(train_inputs, config.batch_size, config)
        dev_iter = build_iterator(valid_inputs, config.test_batch, config)
        test_iter = build_iterator(test_inputs, config.test_batch, config)
        valid_outputs = np.array([], dtype=int)
        for d, (text, labels) in enumerate(dev_iter):
            valid_outputs = np.append(valid_outputs, labels.data.cpu().numpy())
        time_dif = get_time_dif(start_time)
        print("Time usage:", time_dif)
        train(config, model, train_iter, dev_iter, fold)
        oof_p = predict(config, model, dev_iter, fold, activation='softmax')
        oof[valid_idx] = oof_p
        valid_preds.append(oof_p)

        f1, t = search_f1(valid_outputs, valid_preds[-1])
        print('validation score = ', f1)
        each_fold_predict = predict(config, model, test_iter, fold, activation='softmax')
        test_preds.append(each_fold_predict)
        sub = each_fold_predict > t
        # df_test['label'] = sub.astype(int)
        # df_test[['id', 'id_sub', 'label']].to_csv('submission_beike_{}.tsv'.format(fold), index=False,
        #                                           header=None, sep='\t')
        torch.cuda.empty_cache()

    outputs = np.asarray(df_train['label'])
    best_score, best_t = search_f1(outputs, oof)
예제 #3
0
def main():
    #parse arguments
    config.parse()
    args = config.args
    for k, v in vars(args).items():
        logger.info(f"{k}:{v}")
    #set seeds
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)

    #arguments check
    device, n_gpu = args_check(args)
    os.makedirs(args.output_dir, exist_ok=True)
    forward_batch_size = int(args.train_batch_size /
                             args.gradient_accumulation_steps)
    args.forward_batch_size = forward_batch_size

    #load bert config
    bert_config_S = BertConfig.from_json_file(args.bert_config_file_S)
    assert args.max_seq_length <= bert_config_S.max_position_embeddings

    #read data
    train_examples = None
    train_features = None
    eval_examples = None
    eval_features = None
    num_train_steps = None

    tokenizer = ChineseFullTokenizer(vocab_file=args.vocab_file,
                                     do_lower_case=args.do_lower_case)
    convert_fn = partial(convert_examples_to_features,
                         tokenizer=tokenizer,
                         max_seq_length=args.max_seq_length,
                         doc_stride=args.doc_stride,
                         max_query_length=args.max_query_length)
    if args.do_train:
        train_examples, train_features = read_and_convert(
            args.train_file,
            is_training=True,
            do_lower_case=args.do_lower_case,
            read_fn=read_squad_examples,
            convert_fn=convert_fn)
        if args.fake_file_1:
            fake_examples1, fake_features1 = read_and_convert(
                args.fake_file_1,
                is_training=True,
                do_lower_case=args.do_lower_case,
                read_fn=read_squad_examples,
                convert_fn=convert_fn)
            train_examples += fake_examples1
            train_features += fake_features1
        if args.fake_file_2:
            fake_examples2, fake_features2 = read_and_convert(
                args.fake_file_2,
                is_training=True,
                do_lower_case=args.do_lower_case,
                read_fn=read_squad_examples,
                convert_fn=convert_fn)
            train_examples += fake_examples2
            train_features += fake_features2

        num_train_steps = int(len(train_features) /
                              args.train_batch_size) * args.num_train_epochs

    if args.do_predict:
        eval_examples, eval_features = read_and_convert(
            args.predict_file,
            is_training=False,
            do_lower_case=args.do_lower_case,
            read_fn=read_squad_examples,
            convert_fn=convert_fn)

    #Build Model and load checkpoint
    model_S = BertForQASimple(bert_config_S, args)
    #Load student
    if args.load_model_type == 'bert':
        assert args.init_checkpoint_S is not None
        state_dict_S = torch.load(args.init_checkpoint_S, map_location='cpu')
        state_weight = {
            k[5:]: v
            for k, v in state_dict_S.items() if k.startswith('bert.')
        }
        missing_keys, _ = model_S.bert.load_state_dict(state_weight,
                                                       strict=False)
        assert len(missing_keys) == 0
    elif args.load_model_type == 'all':
        assert args.tuned_checkpoint_S is not None
        state_dict_S = torch.load(args.tuned_checkpoint_S, map_location='cpu')
        model_S.load_state_dict(state_dict_S)
    else:
        logger.info("Model is randomly initialized.")
    model_S.to(device)

    if args.local_rank != -1 or n_gpu > 1:
        if args.local_rank != -1:
            raise NotImplementedError
        elif n_gpu > 1:
            model_S = torch.nn.DataParallel(model_S)  #,output_device=n_gpu-1)

    if args.do_train:
        #parameters
        params = list(model_S.named_parameters())
        all_trainable_params = divide_parameters(params, lr=args.learning_rate)
        logger.info("Length of all_trainable_params: %d",
                    len(all_trainable_params))

        optimizer = BERTAdam(all_trainable_params,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=num_train_steps,
                             schedule=args.schedule,
                             s_opt1=args.s_opt1,
                             s_opt2=args.s_opt2,
                             s_opt3=args.s_opt3)

        logger.info("***** Running training *****")
        logger.info("  Num orig examples = %d", len(train_examples))
        logger.info("  Num split examples = %d", len(train_features))
        logger.info("  Forward batch size = %d", forward_batch_size)
        logger.info("  Num backward steps = %d", num_train_steps)

        ########### DISTILLATION ###########
        train_config = TrainingConfig(
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            ckpt_frequency=args.ckpt_frequency,
            log_dir=args.output_dir,
            output_dir=args.output_dir,
            device=args.device)

        distiller = BasicTrainer(train_config=train_config,
                                 model=model_S,
                                 adaptor=BertForQASimpleAdaptorTraining)

        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_doc_mask = torch.tensor([f.doc_mask for f in train_features],
                                    dtype=torch.float)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_start_positions = torch.tensor(
            [f.start_position for f in train_features], dtype=torch.long)
        all_end_positions = torch.tensor(
            [f.end_position for f in train_features], dtype=torch.long)

        train_dataset = TensorDataset(all_input_ids, all_segment_ids,
                                      all_input_mask, all_doc_mask,
                                      all_start_positions, all_end_positions)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_dataset)
        else:
            raise NotImplementedError
        train_dataloader = DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args.forward_batch_size,
                                      drop_last=True)
        callback_func = partial(predict,
                                eval_examples=eval_examples,
                                eval_features=eval_features,
                                args=args)
        with distiller:
            distiller.train(optimizer,
                            scheduler=None,
                            dataloader=train_dataloader,
                            num_epochs=args.num_train_epochs,
                            callback=callback_func)

    if not args.do_train and args.do_predict:
        res = predict(model_S, eval_examples, eval_features, step=0, args=args)
        print(res)
예제 #4
0
                        torch.save(model, f)
                    best_val = val_loss
            if epoch % 10 == 0:
                test_loss, test_rae, test_corr, test_f1 = eval_method(
                    Data, Data.test[0], Data.test[1], model, evaluateL2,
                    evaluateL1, args)
                writer.add_scalar(args.save + '/testloss', test_loss, epoch)
                writer.add_scalar(args.save + '/testrae', test_rae, epoch)
                writer.add_scalar(args.save + '/testcorr', test_corr, epoch)
                writer.add_scalar(args.save + '/testf1', test_f1, epoch)
                print(
                    "| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f} | f1 {:5.4f}\n"
                    .format(test_loss, test_rae, test_corr, test_f1))
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model.
    with open(args.save, 'rb') as f:
        model = torch.load(f, )
    test_acc, test_rae, test_corr, test_f1 = evaluate(Data, Data.test[0],
                                                      Data.test[1], model,
                                                      evaluateL2, evaluateL1,
                                                      args)
    writer.add_scalar(args.save + '/testf1', test_f1, args.epochs + 1)
    print('Best model performance:')
    print(
        "| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}| test f1 {:5.4f}"
        .format(test_acc, test_rae, test_corr, test_f1))
    predict(Data, Data.test[0], Data.test[1], model, args)
print(modelname)
예제 #5
0
    config = x.Config(args.word, dataset)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    start_time = time.time()
    print("Loading data...")

    train_data, dev_data, test_data = build_dataset(config, do_train, do_test
                                                    or do_predict, args.word)

    if do_train:
        train_iter = build_iterator(train_data, config)
        dev_iter = build_iterator(dev_data, config)
    if do_test:
        test_iter = build_iterator(test_data, config)
    if do_predict:
        data_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # train
    model = x.Model(config).to(config.device)
    if do_train:
        train(config, model, train_iter, dev_iter)
    if do_test:
        test(config, model, test_iter)
    if do_predict:
        predict(config, model, data_iter)
예제 #6
0
    # Visualize loss and accuracy history
    pl.plot_accuracy(history)
    pl.plot_losses(history)

    # Evaluate final model
    scores = te.evaluate(model=mnist_model, val_loader=test_loader)
    print('Test scores: ', scores)

    # Predict on a few inputs
    test_dataset = MNIST(root='data/',
                         train=False,
                         transform=transforms.ToTensor())
    x, label = dataset[0]
    x = x.unsqueeze(0)
    pred = te.predict(x=x, model=mnist_model)
    print('True label: {}, Predicted: {}'.format(label, pred))

    x, label = dataset[111]
    x = x.unsqueeze(0)
    pred = te.predict(x=x, model=mnist_model)
    print('True label: {}, Predicted: {}'.format(label, pred))

    x, label = dataset[222]
    x = x.unsqueeze(0)
    pred = te.predict(x=x, model=mnist_model)
    print('True label: {}, Predicted: {}'.format(label, pred))

    x, label = dataset[333]
    x = x.unsqueeze(0)
    pred = te.predict(x=x, model=mnist_model)
예제 #7
0
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True

    start_time = time.time()
    print("Loading data...")
    vocab, train_data, dev_data, test_data = build_dataset(config, args.word)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(dev_data, config)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_diff(start_time)
    print("Time usage:", time_dif)

    config.num_vocab = len(vocab)
    model = x.Model(config).to(config.device)
    init_network(model)
    print(model.parameters)
    train(config, model, train_iter, dev_iter)

    predictions = predict(config, model, test_iter)

    if not os.path.exists('result'):
        os.mkdir('result')

    with open(os.path.join(
            'result',
            model_name + ('_word_' if args.word else '_char_') + 'result.txt'),
              'w',
              encoding='UTF-8') as f:
        for i, label in enumerate(predictions):
            print(f'{i}.txt {label}', file=f)
예제 #8
0
batch_size = 256
hidden_size = 128
num_layers = 1
dropout = 0
testnum = 500
# interval is sample interval between last input and first output.
interval = 0

epoch = 100
device = 'cuda'

# Generate sin dataset for training and testing.
dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)])
x_train, y_train, x_test, y_test, normalizer = generate_data(
    dataset, 'minmax', input_length, output_length, testnum, interval)

# Build, train and predict.
model = GRU(1, hidden_size, num_layers, 1, dropout)
optimizer = opt.Adam(model.parameters())
loss = nn.MSELoss()
batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch,
                                         batch_size, optimizer, loss, device)
y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer,
                               batch_size)

# Draw result
plt.plot(y_predict, label='prediction')
plt.plot(y_real, label='real')
plt.legend()
plt.show()