Esempio n. 1
0
parser = argparse.ArgumentParser(description='Chinese Ner Pytorch')
parser.add_argument('--doing', type=str, required=True, help='choose a action: train,test,predict')
parser.add_argument('--model', type=str, required=True, help='choose a model: Bert,Albert,Xlnet,Gpt-2')
args = parser.parse_args()


if __name__ == '__main__':

    model_name = args.model
    x = import_module('Models.' + model_name)
    config = x.Config()
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    start_time = time.time()
    print("Loading Datas...")
    train_dataset = built_train_dataset(config)
    dev_dataset = built_dev_dataset(config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    if args.doing=='train':
        model = x.Model(config).to(config.device)
        train(config, model, train_dataset, dev_dataset)
    if args.doing=='predict':
        model = x.Model(config).to(config.device)
        predict(config,model,)
Esempio n. 2
0
import torch.nn as nn
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3,5"
parser = argparse.ArgumentParser(description='Chinese Text Classification')
parser.add_argument('--model', type=str, required=True, help='choose a model: Bert, ERNIE')
args = parser.parse_args()


if __name__ == '__main__':
    dataset = 'THUCNews'  # 数据集

    model_name = args.model  # bert
    x = import_module('models.' + model_name)
    config = x.Config(dataset)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样



    model = x.Model(config)
    #init_network(model)
    model=nn.DataParallel(model,device_ids=[0,1,2,3])
    model.to(config.device)
    model.load_state_dict(torch.load('THUCNews/saved_dict/ERNIE0.4gauss.ckpt'))


    train(config,model)

model_name = "%s%s.pt" % (CONFIG["SAVE_DIR"], CONFIG["model"])

# initialize the early_stopping object
early_stopping = EarlyStopping(patience=20, verbose=True, path=model_name)

lr = CONFIG["lr"]
for epoch in range(1, CONFIG["epochs"] + 1):
    print(f'Epoch: {epoch:02}')
    epoch_start_time = time.time()

    print('\tTrain: ', end='')
    train_loss, train_acc = train(
        train_dl,
        model,
        criterion,
        list(range(CONFIG["gpus"])),
        DEVICE,
        optimizer,
        scheduler=scheduler,
        seq2seq=not classify,
        pad_idx=vocab[CONFIG["pad_token"]] if not classify else -1)

    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    print('\n\tValid: ', end='')
    with torch.no_grad():
        valid_loss, valid_acc = valid(
            valid_dl,
            model,
            criterion,
            DEVICE,
            temperature=CONFIG["temp"],
        train_data2, dev_data2 = BalanceFold(train_news, 20)
        train_data3, dev_data3 = BalanceFold(train_emotion, 10)
        print("dev size: ", len(dev_data1), len(dev_data2), len(dev_data3))
        train_iter1 = build_iterator2(train_data1, config)
        dev_iter1 = build_iterator2(dev_data1, config)
        train_iter2 = build_iterator2(train_data2, config)
        dev_iter2 = build_iterator2(dev_data2, config)
        train_iter3 = build_iterator2(train_data3, config)
        dev_iter3 = build_iterator2(dev_data3, config)
        print(len(dev_iter1), len(dev_iter2), len(dev_iter3))
        print("len(train_features_1)=", len(train_iter1))
        print("len(train_features_2)=", len(train_iter2))
        print("len(train_features_3)=", len(train_iter3))

        a = []
        for i in range(len(train_iter1)):
            a.append(1)
        for i in range(len(train_iter2)):
            a.append(2)
        for i in range(len(train_iter3)):
            a.append(3)
        random.seed(1234)
        #random.seed(1)
        random.shuffle(a)
        print("len(a)=", len(a))
        time_dif = get_time_dif(start_time)
        print("Time usage:", time_dif)
        config.save_path = '../user_data/Roberta-wwm-36/bert.ckpt'
        # model.load_state_dict(torch.load(config.save_path))
        train(config, model, a, train_iter1, dev_iter1, train_iter2, dev_iter2,
              train_iter3, dev_iter3)
Esempio n. 5
0
    model = Seq2Seq(enc, dec).to(device)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=trg_pad_idx)
    count_parameters(model)

    N_EPOCHS = 90
    CLIP = 0.1

    best_metric = 0

    for epoch in range(N_EPOCHS):
        start_time = time.time()
        model.train()
        train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
        test_loss = evaluate(model, test_iterator, criterion)

        metrics_test = calculate_avg_rouge_f(test_data, SRC, TRG, model,
                                             device)
        print(f'\tMetrics_test: {metrics_test}')

        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if metrics_test > best_metric:
            print('New best score!')
            best_metric = metrics_test
            torch.save(model.state_dict(), 'models/best-model.pt')

        print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
Esempio n. 6
0
                device=device)

model.to(device)

optimizer = optim.Adam(model.parameters())

n_epochs = 5

clip = 1
best_test_loss = float('inf')

for epoch in range(n_epochs):
    print("EPOCH ", epoch, " START #########################################")
    start_time = time.time()

    train_loss = train(model, train_dl, optimizer, clip)
    test_loss, f1 = evaluate(model, test_dl, vectorizer)

    end_time = time.time()

    epoch_time = end_time - start_time

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        torch.save(model, f'./checkpoints/ner_lstm_epoch_{epoch}.pt')
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
Esempio n. 7
0
    val_dataset = dataset[num_train:num_train + num_val]
    test_dataset = dataset[num_train + num_val:]

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False)

    for num_layers, hidden in product(layers, hiddens):
        print(f'--\n{dataset_name} - {Net.__name__} - {num_layers} - {hidden}')

        model = Net(dataset, num_layers, hidden).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

        stats_list = []
        for epoch in range(1, args.epochs + 1):
            loss, stats = train(model, optimizer, train_loader)
            val_acc, val_time = eval_acc(model, val_loader)
            test_acc, test_time = eval_acc(model, test_loader)

            if epoch >= args.warmup_profile:
                stats_list.append(stats)

        stats_summary = get_stats_summary(stats_list)
        print(stats_summary)
Esempio n. 8
0
def main():
    dataset = args.dataset
    shadow_path, target_path, attack_path = "./models/shadow_" + str(dataset) + ".pth", \
                                            "./models/target_" + str(dataset) + ".pth", \
                                            "./models/attack_" + str(dataset) + ".pth"

    # Cifar has rgb images(3 channels) and mnist is grayscale(1 channel)
    if dataset == "cifar":
        input_size = 3
    elif dataset == "mnist":
        input_size = 1

    n_epochs = args.epoch
    attack_epochs = args.attack_epoch
    batch_size = args.batch_size

    # This is the main dataloader with the total dataset
    shadow_train_loader = dataloader(dataset=dataset,
                                     batch_size_train=batch_size,
                                     batch_size_test=1000,
                                     split_dataset="shadow_train")
    shadow_out_loader = dataloader(dataset=dataset,
                                   batch_size_train=batch_size,
                                   batch_size_test=1000,
                                   split_dataset="shadow_out")
    target_train_loader = dataloader(dataset=dataset,
                                     batch_size_train=batch_size,
                                     batch_size_test=1000,
                                     split_dataset="target_train")
    target_out_loader = dataloader(dataset=dataset,
                                   batch_size_train=batch_size,
                                   batch_size_test=1000,
                                   split_dataset="target_out")

    testloader = dataloader(dataset=dataset,
                            batch_size_train=batch_size,
                            batch_size_test=1000,
                            split_dataset="test")

    # Based on attack I the adversary knows the structure of the target net, thus can train a shadow model to mimic the
    # behaviour of the target
    target_net = shadow_net = ConvNet(input_size=input_size)

    # Simple initialization of model weights
    target_loss = shadow_loss = nn.CrossEntropyLoss()
    target_optim = optim.Adam(target_net.parameters(), lr=0.001)
    shadow_optim = optim.Adam(shadow_net.parameters(), lr=0.001)

    # attack net is a binary classifier to determine membership
    attack_net = MlleaksMLP()
    # Binary cross entropy as loss
    attack_loss = nn.BCELoss()
    attack_optim = optim.Adam(attack_net.parameters(), lr=0.001)

    # Three training loops are following, first the shadow model, then the target model and last the attack model.

    if os.path.exists(shadow_path):
        print("Load shadow model")
        shadow_net.load_state_dict(torch.load(shadow_path))
    # Training of shadow model on shadow training set
    if not args.only_eval:
        print("start training shadow model: ")
        for epoch in range(n_epochs):
            loss_train_shadow = train(shadow_net,
                                      shadow_train_loader,
                                      shadow_loss,
                                      shadow_optim,
                                      verbose=False)
            # Evaluate model after every five epochs
            if (epoch + 1) % 5 == 0:
                accuracy_train_shadow = eval_model(shadow_net,
                                                   shadow_train_loader,
                                                   report=False)
                accuracy_test_shadow = eval_model(shadow_net,
                                                  testloader,
                                                  report=True)
                print(
                    "Shadow model: epoch[%d/%d] Train loss: %.5f training set accuracy: %.5f  test set accuracy: %.5f"
                    % (epoch + 1, n_epochs, loss_train_shadow,
                       accuracy_train_shadow, accuracy_test_shadow))
            if args.save_new_models:
                if not os.path.exists("./models"):
                    os.mkdir("./models"
                             )  # Create the folder models if it doesn't exist
                # Save model after each epoch if argument is true
                torch.save(shadow_net.state_dict(),
                           "./models/shadow_" + str(dataset) + ".pth")

    if os.path.exists(target_path):
        print("Load target model")
        target_net.load_state_dict(torch.load(target_path))
    # Train of target model on the target training set
    if not args.only_eval:
        print("start training target model: ")
        for epoch in range(n_epochs):
            loss_train_target = train(target_net,
                                      target_train_loader,
                                      target_loss,
                                      target_optim,
                                      verbose=False)
            # Evaluate model after every five epochs
            if (epoch + 1) % 5 == 0:
                accuracy_train_target = eval_model(target_net,
                                                   target_train_loader,
                                                   report=False)
                accuracy_test_target = eval_model(target_net,
                                                  testloader,
                                                  report=True)
                print(
                    "Target model: epoch[%d/%d] Train loss: %.5f training set accuracy: %.5f  test set accuracy: %.5f"
                    % (epoch + 1, n_epochs, loss_train_target,
                       accuracy_train_target, accuracy_test_target))
            if args.save_new_models:
                # Save model after each epoch
                if not os.path.exists("./models"):
                    os.mkdir("./models"
                             )  # Create the folder models if it doesn't exist
                torch.save(target_net.state_dict(), target_path)

    if os.path.exists(attack_path):
        print("Load attack model")
        attack_net.load_state_dict(torch.load(attack_path))
    # Training of attack model based on shadow net posteriors on shadow train and out datasets.
    if not args.only_eval:
        print("start training attacker model")
        for epoch in range(attack_epochs):
            loss_attack = train_attacker(attack_net,
                                         shadow_net,
                                         shadow_train_loader,
                                         shadow_out_loader,
                                         attack_optim,
                                         attack_loss,
                                         num_posterior=3,
                                         verbose=False)
            # The model is evaluated using the target net posteriors to classify membership of data on target train/out.
            # Evaluate model after every five epochs
            if (epoch + 1) % 1 == 0:
                max_accuracy = eval_attacker(attack_net,
                                             target_net,
                                             target_train_loader,
                                             target_out_loader,
                                             num_posterior=3)
                print(
                    "Attack model: epoch[%d/%d]  Train loss: %.5f  Accuracy on target set: %.5f"
                    % (epoch + 1, attack_epochs, loss_attack, max_accuracy))
                if args.save_new_models:
                    if not os.path.exists("./models"):
                        os.mkdir(
                            "./models"
                        )  # Create the folder models if it doesn't exist
                    # Save model after each epoch
                    torch.save(attack_net.state_dict(), attack_path)

    # Only evaluated pretrained loaded models when only_eval argument is True
    if args.only_eval:
        print("Classification Report Shadow Net:")
        eval_model(shadow_net, testloader, report=True)
        print("Classification Report Target Net:")
        eval_model(target_net, testloader, report=True)
        print("Report of Attack Net")
        eval_attacker(attack_net,
                      target_net,
                      target_train_loader,
                      target_out_loader,
                      num_posterior=3)
Esempio n. 9
0
x = import_module('models.' + model_name)
# 配置参数
config = x.Config(dataset)

# 固定以下参数是为了保证每次结果一样
np.random.seed(1)
# 为CPU设置种子用于生成随机数
torch.manual_seed(1)
# #为所有GPU设置随机种子
torch.cuda.manual_seed_all(1)
# 这个参数为True, 每次返回的卷积算法将是确定的,即默认算法
torch.backends.cudnn.deterministic = True  # 保证每次结果一样

start_time = time.time()
print("Loading data...")
OCNLI_train, OCNLI_dev, OCEMOTION_train, OCEMOTION_dev, TNEWS_train, TNEWS_dev = build_dataset(
    config, mode='train')
OCNLI_train_iter = build_iterator(OCNLI_train, config)
OCEMOTION_train_iter = build_iterator(OCEMOTION_train, config)
TNEWS_train_iter = build_iterator(TNEWS_train, config)
OCNLI_dev_iter = build_iterator(OCNLI_dev, config)
OCEMOTION_dev_iter = build_iterator(OCEMOTION_dev, config)
TNEWS_dev_iter = build_iterator(TNEWS_dev, config)

time_dif = get_time_dif(start_time)

# train
model = x.Model(config).to(config.device)
train(config, model, OCNLI_train_iter, OCNLI_dev_iter, OCEMOTION_train_iter,
      OCEMOTION_dev_iter, TNEWS_train_iter, TNEWS_dev_iter)
Esempio n. 10
0
    model = DSN(in_dim=64,
                hid_dim=args.hidden_dim,
                num_layers=args.num_layers,
                cell=args.rnn_cell)
    model.apply(weights_init)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    print(" ========== \nArgs:{} \n========== ".format(args))
    args.train_keys = split['train_keys']
    args.test_keys = split['test_keys']

    if args.train_model == 'sup':
        print("========Supervised Learning========")
    else:
        args.use_reward = True
        print("========Unsupervised Learning========")

    args.demo_h5 = osp.join(
        args.save_path,
        'h5_res' + args.reward_type + str(args.lr) + args.train_model)

    model = train(args, model, dataset)

    # Testing
    Fscore, Precision, Recall = evaluate(args, model, dataset)

    # save model
    save_model_epoch(args, model, args.max_epoch)
Esempio n. 11
0
batch_size = 256
hidden_size = 128
num_layers = 1
dropout = 0
testnum = 500
# interval is sample interval between last input and first output.
interval = 0

epoch = 100
device = 'cuda'

# Generate sin dataset for training and testing.
dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)])
x_train, y_train, x_test, y_test, normalizer = generate_data(
    dataset, 'minmax', input_length, output_length, testnum, interval)

# Build, train and predict.
model = GRU(1, hidden_size, num_layers, 1, dropout)
optimizer = opt.Adam(model.parameters())
loss = nn.MSELoss()
batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch,
                                         batch_size, optimizer, loss, device)
y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer,
                               batch_size)

# Draw result
plt.plot(y_predict, label='prediction')
plt.plot(y_real, label='real')
plt.legend()
plt.show()
Esempio n. 12
0
    label_vocab_dim = len(w2i_fre_train)

    m = model.enc_dec_attn(args.enc_hid, args.dec_hid, args.emb_dim,
                           args.drop_prob, device, inp_vocab_dim,
                           label_vocab_dim)
    # print(m)
    # print(f'The model has {model.count_parameters(m):,} trainable parameters')

    data_loader_train = data_loader.get_data_loader(train, w2i_eng_train,
                                                    w2i_fre_train,
                                                    args.batch_size, eng_lm,
                                                    fre_lm)
    data_loader_val = data_loader.get_data_loader(val, w2i_eng_val,
                                                  w2i_fre_val, args.batch_size,
                                                  eng_lm, fre_lm)
    data_loader_test = data_loader.get_data_loader(test, w2i_eng_test,
                                                   w2i_fre_test,
                                                   args.batch_size, eng_lm,
                                                   fre_lm)

    loss = train_eval.train(m, args.epochs, args.batch_size, data_loader_train,
                            data_loader_val, w2i_eng_train, i2w_eng_val,
                            w2i_fre_train, i2w_fre_val, device)

    criterion = nn.CrossEntropyLoss().to(device)
    loss = train_eval.val_test(m, data_loader_test, criterion, w2i_eng_train,
                               i2w_eng_test, w2i_fre_train, i2w_fre_test,
                               device, 'test')

    print('Test Loss: {}'.format(loss))
Esempio n. 13
0
from train_eval import train

from utils import get_sent, get_input, get_data_loader, printm

data_path = './data'
# data_path = './data'

bert_path = './bert'
# bert_path = '/content/bert-utils/bert/multilingual_L-12_H-768_A-12'

if __name__ == '__main__':
    config = Config(data_path, bert_path)
    if config.device == 'cuda':
        memoryUtil = printm(config)
        if memoryUtil > 0.2:
            try:
                sys.exit()
            except:
                print('MemoryUtil FULL')

    train_sent, dev_sent, test_sent = get_sent(data_path)

    train_dataloader = get_data_loader(config, train_sent)
    dev_dataloader = get_data_loader(config, dev_sent)
    test_dataloader = get_data_loader(config, test_sent)

    model = config.model

    train(config, model, train_dataloader, dev_dataloader, test_dataloader)
Esempio n. 14
0
        train_loader = DataLoader(dataset=train_datasets,
                                  batch_size=128,
                                  shuffle=True,
                                  num_workers=0)
        dev_loader = DataLoader(dataset=dev_datasets,
                                batch_size=512,
                                shuffle=False,
                                num_workers=0)

        model = Model(config, embedding_pretraineds, device)  # 定义模型
        logger.info('{} Fold'.format(i + 1))
        logger.info(('*' * 50))
        logger.debug(model)

        train(config, model, train_loader, dev_loader)

        test_loader = DataLoader(dataset=test_datasets,
                                 batch_size=512,
                                 shuffle=False,
                                 num_workers=0)

        preds_age = test(config, model, test_loader)

        index_col = config.getint('model_parameters_settings', 'test_index')

        # 生成提交数据
        preds_age = preds_age + 1

        preds_gender = list([4 for _ in range(len(preds_age))])
Esempio n. 15
0
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

args = parser.parse_args()

if __name__ == '__main__':
    dataset = 'text_emotion'  # 数据集

    model_name = args.model  # bert
    x = import_module('models.' + model_name)
    config = x.Config(dataset)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样
    if torch.cuda.device_count() > 0:
        torch.cuda.manual_seed_all(1)

    start_time = time.time()
    print("Loading data...")

    train_iter, dev_iter = build_dataset(config, args)
    #train_iter = build_iterator(train_data, config)
    #dev_iter = build_iterator(dev_data, config)
    #test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    model = x.Model(config).to(config.device)
    train(config, model, train_iter, dev_iter, None)
Esempio n. 16
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--model_type",
                        default='bert',
                        type=str,
                        help="Model type selected in the list: " +
                        ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument(
        "--model_name_or_path",
        default='bert-base-uncased',
        type=str,
        help="Path to pre-trained model or shortcut name selected in the list: "
        + ", ".join(ALL_MODELS))
    parser.add_argument(
        "--output_dir",
        default='../output_mc',
        type=str,
        help=
        "The output directory where the model checkpoints and predictions will be written."
    )
    parser.add_argument("--raw_data_dir", default='../data_mc', type=str)
    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name")
    parser.add_argument(
        "--max_seq_length",
        default=384,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. Sequences "
        "longer than this will be truncated, and sequences shorter than this will be padded."
    )
    parser.add_argument("--task_name", default='DREAM')
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_test",
                        action='store_true',
                        help='Whether to run test on the test set')
    parser.add_argument(
        "--evaluate_during_training",
        action='store_true',
        help="Rul evaluation during training at each logging step.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")

    parser.add_argument("--per_gpu_train_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--learning_rate",
                        default=3e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=2.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs."
    )
    parser.add_argument("--warmup_proportion",
                        default=0.1,
                        type=float,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--time_stamp", default='', type=str)
    parser.add_argument(
        "--verbose_logging",
        action='store_true',
        help=
        "If true, all of the warnings related to data processing will be printed. "
        "A number of warnings are expected for a normal SQuAD evaluation.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action='store_true',
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number"
    )
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--overwrite_output_dir',
                        action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        '--overwrite_cache',
        action='store_true',
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument(
        '--fp16',
        action='store_true',
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit"
    )
    parser.add_argument(
        '--fp16_opt_level',
        type=str,
        default='O1',
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html")
    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        logger.info(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))
    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device
    # Set seed
    set_seed(args)
    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path)
    config.output_hidden_states = True
    config.num_options = int(
        MULTIPLE_CHOICE_TASKS_NUM_LABELS[args.task_name.lower()])
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool('.ckpt' in args.model_name_or_path),
        config=config)
    model.MLP.copy_from_bert(model.bert)
    model.to(args.device)
    logger.info("Training/evaluation parameters %s", args)

    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, 'einsum')
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    if args.do_train:
        logging.getLogger("transformers.tokenization_utils").setLevel(
            logging.ERROR)  # Reduce logging
        train_dataset = load_and_cache_examples(args,
                                                task=args.task_name,
                                                tokenizer=tokenizer,
                                                evaluate=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    time_stamp = args.time_stamp
    # Evaluation
    # We do not use dev set
    if args.do_eval and args.local_rank in [-1, 0]:
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
            logging.getLogger("transformers.tokenization_utils").setLevel(
                logging.ERROR)  # Reduce logging
            logging.getLogger("transformers.configuration_utils").setLevel(
                logging.WARN)  # Reduce logging
        checkpoints = [
            checkpoint for checkpoint in checkpoints
            if time_stamp in checkpoint
        ]
        logger.info("Evaluate the following checkpoints for validation: %s",
                    checkpoints)
        best_ckpt = 0
        best_acc = 0
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                '-')[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                '/')[-1] if checkpoint.find('checkpoint') != -1 else ""
            logger.info("Load the model: %s", checkpoint)

            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args,
                              args.task_name,
                              model,
                              tokenizer,
                              prefix=prefix)
            if result[0]['eval_acc'] > best_acc:
                best_ckpt = checkpoint
                best_acc = result[0]['eval_acc']
    if args.do_test and args.local_rank in [-1, 0]:
        try:
            checkpoints = [best_ckpt]
        except:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME,
                              recursive=True)))
        checkpoints = [
            checkpoint for checkpoint in checkpoints
            if time_stamp in checkpoint
        ]
        logging.getLogger("transformers.modeling_utils").setLevel(
            logging.WARN)  # Reduce logging
        logging.getLogger("transformers.configuration_utils").setLevel(
            logging.WARN)  # Reduce logging
        logging.getLogger("transformers.tokenization_utils").setLevel(
            logging.ERROR)  # Reduce logging

        logger.info("Evaluate the following checkpoints for final testing: %s",
                    checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                '-')[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                '/')[-1] if checkpoint.find('checkpoint') != -1 else ""
            logger.info("Load the model: %s", checkpoint)
            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            task_string = [
                '', '-Add1OtherTruth2Opt', '-Add2OtherTruth2Opt',
                '-Add1PasSent2Opt', '-Add1NER2Pass'
            ]
            task_string = [args.task_name + item for item in task_string]
            result = evaluate(args,
                              task_string,
                              model,
                              tokenizer,
                              prefix=prefix,
                              test=True)
Esempio n. 17
0
        'class_label'].apply(lambda x: 0)
    # 合并数据集
    data_merge = pd.merge(select_from_data_train,
                          select_from_data_neg,
                          how="outer")
    # 打乱数据
    data_merge = data_merge.sample(frac=1).reset_index(drop=True)

    # 切分数据集
    train_set = data_merge.loc[:int(data_merge.shape[0] * 0.8)]
    dev_set = data_merge.loc[int(data_merge.shape[0] * 0.8):]
    # 将数据保存到模型定义好的路径里面去
    train_set.to_csv('THUCNews/data/train.csv', index=False, header=False)
    dev_set.to_csv('THUCNews/data/dev.csv', index=False, header=False)
    # train_data, dev_data, test_data = build_dataset(config)
    train_data, dev_data = build_dataset(config)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(dev_data, config)
    # test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # train
    model = x.Model(config).to(config.device)
    # train(config, model, train_iter, dev_iter, test_iter)
    train(config,
          model,
          train_iter,
          dev_iter,
          save_path="THUCNews/saved_dict/" + model_name + '.ckpt')
Esempio n. 18
0
def main(config):

    if not os.path.exists(config.model_dir):
        os.makedirs(config.model_dir)

    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir)

    print("\t \t \t the model name is {}".format(config.model_name))
    device, n_gpu = get_device()

    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(config.seed)
        torch.backends.cudnn.deterministic = True  # cudnn 使用确定性算法,保证每次结果一样
    """ sst2 数据准备 """
    text_field = data.Field(tokenize='spacy',
                            lower=True,
                            include_lengths=True,
                            fix_length=config.sequence_length)
    label_field = data.LabelField(dtype=torch.long)

    train_iterator, dev_iterator, test_iterator = load_sst2(
        config.data_path, text_field, label_field, config.batch_size, device,
        config.glove_word_file)
    """ 词向量准备 """
    pretrained_embeddings = text_field.vocab.vectors

    model_file = config.model_dir + 'model1.pt'
    """ 模型准备 """
    if config.model_name == "TextCNN":
        filter_sizes = [int(val) for val in config.filter_sizes.split()]
        model = TextCNN.TextCNN(config.glove_word_dim, config.filter_num,
                                filter_sizes, config.output_dim,
                                config.dropout, pretrained_embeddings)
    elif config.model_name == "TextRNN":
        model = TextRNN.TextRNN(config.glove_word_dim, config.output_dim,
                                config.hidden_size, config.num_layers,
                                config.bidirectional, config.dropout,
                                pretrained_embeddings)
    elif config.model_name == "LSTMATT":
        model = LSTMATT.LSTMATT(config.glove_word_dim, config.output_dim,
                                config.hidden_size, config.num_layers,
                                config.bidirectional, config.dropout,
                                pretrained_embeddings)
    elif config.model_name == 'TextRCNN':
        model = TextRCNN.TextRCNN(config.glove_word_dim, config.output_dim,
                                  config.hidden_size, config.num_layers,
                                  config.bidirectional, config.dropout,
                                  pretrained_embeddings)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    criterion = criterion.to(device)

    if config.do_train:
        train(config.epoch_num, model, train_iterator, dev_iterator, optimizer,
              criterion, ['0', '1'], model_file, config.log_dir,
              config.print_step, 'word')

    model.load_state_dict(torch.load(model_file))

    test_loss, test_acc, test_report = evaluate(model, test_iterator,
                                                criterion, ['0', '1'], 'word')
    print("-------------- Test -------------")
    print(
        "\t Loss: {} | Acc: {} | Micro avg F1: {} | Macro avg F1: {} | Weighted avg F1: {}"
        .format(test_loss, test_acc, test_report['micro avg']['f1-score'],
                test_report['macro avg']['f1-score'],
                test_report['weighted avg']['f1-score']))
Esempio n. 19
0
    embedding = 'random'
    if args.embedding == 'random':
        embedding = 'random'
    model_name = args.model  # 'TextRCNN'  # TextCNN, TextRNN, FastText, TextRCNN, TextRNN_Att, DPCNN, Transformer

    from utils import build_dataset, build_iterator, get_time_dif

    x = import_module('models.' + model_name)
    config = x.Config(dataset, embedding)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    start_time = time.time()
    print("Loading data...")
    vocab, train_data, dev_data, test_data, id_to_word = build_dataset(config, args.word)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(dev_data, config)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # train
    config.n_vocab = len(vocab)
    model = x.Model(config).to(config.device)
    if model_name != 'Transformer':
        init_network(model)
    print(model.parameters)
    train(config, model, train_iter, dev_iter, test_iter, id_to_word)
Esempio n. 20
0
def main(args):

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train):
        print("输出目录 ({}) 已经存在且不为空. ".format(args.output_dir))
        print("你想覆盖掉该目录吗?type y or n")

        if input() == 'n':
            return

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # gpu ready
    gpu_ids = [int(device_id) for device_id in args.gpu_ids.split()]
    args.device, args.n_gpu = get_device(gpu_ids[0])

    # PTM ready
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(args.config_file,
                                          num_labels=2,
                                          cache_dir=None)
    tokenizer = tokenizer_class.from_pretrained(
        args.vocab_file, do_lower_case=args.do_lower_case, cache_dir=None)

    # train and eval get the checkpoint
    if args.do_train:
        train_dataset = load_data(args, tokenizer, 'train')
        train_dataloader = random_dataloader(train_dataset,
                                             args.train_batch_size)

        dev_dataset = load_data(args, tokenizer, 'dev')
        dev_dataloader = sequential_dataloader(dev_dataset,
                                               args.dev_batch_size)

        # 模型准备
        model = model_class.from_pretrained(args.model_file,
                                            from_tf=False,
                                            config=config,
                                            cache_dir=None)

        model.to(args.device)
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model, device_ids=gpu_ids)

        # optimizer ready
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                args.weight_decay,
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=args.learning_rate,
                          eps=args.adam_epsilon)

        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.warmup_steps,
            num_training_steps=t_total)
        train(args, train_dataloader, dev_dataloader, model, optimizer,
              scheduler, tokenizer)

    # Predict checkpoint result

    tokenizer = tokenizer_class.from_pretrained(
        args.output_dir, do_lower_case=args.do_lower_case)
    test_dataset = load_data(args, tokenizer, 'test')
    test_dataloader = sequential_dataloader(test_dataset, args.test_batch_size)

    model = model_class.from_pretrained(args.output_dir)
    model.to(args.device)

    eval_loss, eval_metric = evaluate(args,
                                      model,
                                      test_dataloader,
                                      do_predict=True)
    for key, val in eval_metric.items():
        print('the test dataset {} is {}'.format(key, val))
Esempio n. 21
0
                              dropout=dropout)
    model = Seq2Seq.Seq2Seq(encoder, decoder, device).to(device)
    model_name = "S2S.pt"

print("Initialize weights")
model.apply(initialize_weights)

optimizer = optim.Adam(model.parameters(), lr=lr)
target_pad_idx = en_field.vocab.stoi[en_field.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index=target_pad_idx)

best_val_loss = float('inf')
writer = SummaryWriter(log_dir)
for epoch in range(num_epochs):
    s = time.time()
    train_loss = train(model, train_loader, optimizer, criterion, clip=1)
    val_loss = evaluate(model, val_loader, criterion)

    t = time.time()

    epoch_min, epoch_sec = epoch_time(s, t)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), os.path.join(ckpt_dir, model_name))

    print("Epoch : %02d | Elapsed Time : %02d min %02d sec" %
          (epoch + 1, epoch_min, epoch_sec))
    print("\t Train Loss : %.3f | Train PPL : %7.3f" %
          (train_loss, math.exp(train_loss)))
    print("\t Val   Loss : %.3f | Val   PPL : %7.3f" %
Esempio n. 22
0
from utils_new import build_dataset, build_iterator, get_time_dif

parser = argparse.ArgumentParser(description='Chinese Text Classification')
parser.add_argument('--model', type=str, required=True, help='choose a model: Bert, ERNIE')
args = parser.parse_args()


if __name__ == '__main__':
    dataset = 'HITSZQA'  # 数据集

    model_name = args.model  # bert
    x = import_module('models.' + model_name)
    config = x.Config(dataset)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    start_time = time.time()
    print("Loading data...")
    train_data, dev_data, test_data = build_dataset(config)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(dev_data, config)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # train
    model = x.Model(config).to(config.device)
    train(config, model, train_iter, dev_iter, test_iter)
Esempio n. 23
0
        self.train_len=0
        self.test_len = 0
        self.valid_len = 0
        self.mode="train"

        ## transformer的参数
        self.dropout=0.5
        self.max_len=5000
        self.nhead=2

#data_path="E:/study_series/2020_3/re_write_classify/data/"
#data_path="/mnt/data3/wuchunsheng/code/nlper/NLP_task/text_classification/my_classification_cnews/2020_3_30/text_classify/data/"

config=Config()
train_iter, valid_iter, test_iter, TEXT=generate_data(config)
#check_data(train_iter,TEXT)


device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#model = RNNModel(config, TEXT).to(device)

model=TransformerModel(config, TEXT).to(device)

train(config,model,train_iter, valid_iter, test_iter)

#res=test(config,model,TEXT,  test_iter)## 测试的是一个正批量的
#print(res)
res=test_one_sentence(config, model, TEXT, test_iter)
print(res)