Ejemplo n.º 1
0
def run_evaluation_bert(args, checkpoint, test_loader, vocab_size):
    device = args.device
    model = BERT().to(device)
    # model = nn.DataParallel(model)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    answer_file = open(args.result_path+'/answer.txt', "w")
    # For ensemble
    logit_file = open(args.result_path + '/logit.txt', "w")
    for i, batch in enumerate(test_loader):
        text, context = batch.text, batch.context
        text = text.type(torch.LongTensor).to(device)
        output = model.run_eval(text)
        pred = torch.argmax(output, 1).tolist()
        assert len(pred) == 1
        if pred[0] == 1:
            label = 'SARCASM'
        elif pred[0] == 0:
            label = 'NOT_SARCASM'
        else:
            raise NotImplementedError("Strange pred.")
        answer_file.write("twitter_{},{}".format(i+1, label))
        answer_file.write('\n')
        logit_file.write("{},{}".format(output[0][0], output[0][1]))
        logit_file.write("\n")
    answer_file.close()
    logit_file.close()
Ejemplo n.º 2
0
    def __init__(self, lsz, args):
        super().__init__()

        self.bert = BERT(args)

        self.sent_predict = nn.Linear(args.d_model, lsz)
        self.sent_predict.weight.data.normal_(INIT_RANGE)
        self.sent_predict.bias.data.zero_()
Ejemplo n.º 3
0
    def __init__(self, lsz, args):
        super().__init__()

        self.bert = BERT(args)

        self.sent_predict = nn.Linear(args.d_model, lsz)
        self.word_predict = nn.Linear(args.d_model, args.vsz)

        self.reset_parameters()
Ejemplo n.º 4
0
def main(args):
    train_loader, test_loader = load_data(args)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    args.vocab_len = len(args.vocab['stoi'].keys())

    model = BERT(args.vocab_len, args.max_len, args.heads, args.embedding_dim,
                 args.N)
    if args.cuda:
        model = model.cuda()

    if args.task:
        print('Start Down Stream Task')
        args.epochs = 3
        args.lr = 3e-5

        state_dict = torch.load(args.checkpoints)
        model.load_state_dict(state_dict['model_state_dict'])

        criterion = {'mlm': None, 'nsp': nn.CrossEntropyLoss()}

        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs + 1):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
    else:
        print('Start Pre-training')
        criterion = {
            'mlm': nn.CrossEntropyLoss(ignore_index=0),
            'nsp': nn.CrossEntropyLoss()
        }
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
Ejemplo n.º 5
0
def main():

    random.seed(rdn_seed)
    np.random.seed(rdn_seed)
    torch.manual_seed(rdn_seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print("device: {}, n_gpu: {}".format(device, n_gpu))
    if device == "cuda":
        torch.cuda.manual_seed_all(rdn_seed)

    tokenizer = BehaviorsBERTTokenizer(vocab_file)
    print("Vocab size:", tokenizer.vocab_size)

    train_dataset = BERTDataset(corpus_path,
                                tokenizer,
                                max_seq_len,
                                corpus_lines=corpus_lines)
    batch_size = per_gpu_batch_size * n_gpu
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

    bert = BERT(vocab_size=tokenizer.vocab_size,
                hidden=hidden,
                n_layers=layers,
                attn_heads=attn_heads,
                max_seq_len=max_seq_len)
    trainer = BERTTrainer(bert,
                          tokenizer.vocab_size,
                          epochs,
                          tensorboard_log_dir=tensorboard_log_dir,
                          output_path=output_path,
                          train_dataloader=train_dataloader,
                          with_cuda=torch.cuda.is_available(),
                          log_freq=100,
                          save_steps=100000)

    trainer.train()
Ejemplo n.º 6
0
def run_training_bert(args, dataset, train_loader, val_loader, vocab_size):
    checkpoint_path = os.path.join(args.checkpoint_path, args.checkpoint)
    device = torch.device("cuda:" +
                          args.device if torch.cuda.is_available() else "cpu")

    model = BERT().to(device)

    # Initialize BCELoss function
    # criterion = nn.BCEWithLogitsLoss()
    # Setup Adam optimizers for both G and D
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    model.train()  # turn on training mode
    # Training Loop
    print("Starting Training Loop...")
    # For each epoch
    for epoch in range(args.epochs):
        # For each batch in the dataloader
        losses = []
        running_corrects = 0
        for i, batch in enumerate(train_loader):
            # format batch
            text, context, label = batch.text, batch.context, batch.label
            # print(text.tolist()[0])
            # print(label.tolist()[0])
            label = label.type(torch.LongTensor).to(device)
            text = text.type(torch.LongTensor).to(device)

            output = model(text, label)
            loss, _ = output

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.append(loss.item())
        epoch_loss = sum(losses) / len(losses)
        print('Epoch: {}, Training Loss: {:.4f}'.format(epoch, epoch_loss))
        # save model
        if epoch % 1 == 0 or epoch == args.epochs - 1:
            torch.save(
                {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'vocab_size': vocab_size,
                    'args': vars(args)
                }, checkpoint_path)
            if args.eval:
                model.eval()
                with torch.no_grad():
                    preds = []
                    labels = []
                    eval_losses = []
                    for i, batch in enumerate(val_loader if val_loader
                                              is not None else train_loader):
                        text, context, label = batch.text, batch.context, batch.label
                        label = label.type(torch.LongTensor).to(device)
                        text = text.type(torch.LongTensor).to(device)
                        output = model(text, label)
                        loss, output = output
                        pred = torch.argmax(output, 1).tolist()
                        preds.extend(pred)
                        labels.extend(label.tolist())
                        eval_losses.append(loss.item())
                    print("{} Precision: {}, Recall: {}, F1: {}, Loss: {}".
                          format(
                              "Train" if val_loader is None else "Valid",
                              sklearn.metrics.precision_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)),
                              sklearn.metrics.recall_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)),
                              sklearn.metrics.f1_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)), np.average(eval_losses)))
Ejemplo n.º 7
0
                            seq_len=args.seq_len,
                            corpus_lines=args.corpus_lines,
                            on_memory=args.on_memory)

print("Creating Dataloader")
train_data_loader = DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               num_workers=args.num_workers)

print("Reading Word Vectors")
weights_matrix = ReadWordVec(args.emb_path, args.emb_filename, args.emb_dim)

print("Building Model")
bert = BERT(len(vocab),
            weights_matrix,
            hidden=args.emb_dim,
            n_layers=args.layers,
            attn_heads=args.attn_heads)

print("Creating Trainer")
trainer = BERTTrainer(bert,
                      len(vocab),
                      args.seq_len,
                      train_dataloader=train_data_loader,
                      lr=args.lr,
                      betas=(args.adam_beta1, args.adam_beta2),
                      weight_decay=args.adam_weight_decay,
                      with_cuda=args.with_cuda,
                      cuda_devices=args.cuda_devices,
                      log_freq=args.log_freq)
Ejemplo n.º 8
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert")
    parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set")
    parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab")
    parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model")

    parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model")
    parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers")
    parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads")
    parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len")

    parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size")
    parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs")
    parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size")

    parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false")
    parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
    parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
    parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false")

    parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam")
    parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
    parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
    parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value")

    args = parser.parse_args()

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len,
                                corpus_lines=args.corpus_lines, on_memory=args.on_memory)

    print("Loading Test Dataset", args.test_dataset)
    test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
        if args.test_dataset is not None else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None

    print("Building BERT model")
    bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
                          lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq)

    print("Training Start")
    for epoch in range(args.epochs):
        print("eee")
        trainer.train(epoch)
        trainer.save(epoch, args.output_path)

        if test_data_loader is not None:
            trainer.test(epoch)
Ejemplo n.º 9
0
    # TODO: Load dataset

    train_dataset = MyDataset(args.train_file)
    word2vec = train_dataset.symbol2id
    test_dataset = MyDataset(args.test_file,
                             vocab=(train_dataset.symbol2id,
                                    train_dataset.id2sybmol))

    test_loader = DataLoader(test_dataset,
                             batch_size=hyperparams['batch_size'])
    num_tokens = len(train_dataset.id2sybmol)
    print('num tokens', num_tokens)
    print('size', test_dataset.seq.size())

    model = BERT(num_tokens).to(device)

    if args.load:
        # print("Model's state_dict:")
        # for param_tensor in torch.load('./model.pt', map_location=torch.device(device)):
        #     print(param_tensor, "\t", model.state_dict()[param_tensor].size())
        model.load_state_dict(
            torch.load('./model.pt', map_location=torch.device(device)))
    if args.train:
        train(model, train_dataset, experiment, hyperparams)
    if args.save:
        torch.save(model.state_dict(), './model.pt')
    if args.test:
        test(model, test_loader, experiment, hyperparams)
    if args.analysis:
        embedding_analysis(model, experiment, train_dataset, test_dataset)
Ejemplo n.º 10
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--dataset",
                        required=True,
                        type=str,
                        help="dataset")
    # parser.add_argument("-c", "--train_dataset", required=True,
    #                     type=str, help="train dataset for train bert")
    # parser.add_argument("-t", "--test_dataset", type=str,
    #                     default=None, help="test set for evaluate train set")
    # parser.add_argument("-v", "--vocab_path", required=True,
    #                     type=str, help="built vocab model path with bert-vocab")
    parser.add_argument("-o",
                        "--output_path",
                        required=True,
                        type=str,
                        help="ex)output/bert.model")

    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=8,
                        help="number of layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-s",
                        "--seq_len",
                        type=int,
                        default=64,
                        help="maximum sequence len")

    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=64,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=5,
                        help="dataloader worker size")
    parser.add_argument("--duplicate",
                        type=int,
                        default=5,
                        help="dataloader worker size")

    parser.add_argument("--with_cuda",
                        type=bool,
                        default=True,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=int,
                        nargs='+',
                        default=None,
                        help="CUDA device ids")
    parser.add_argument("--on_memory",
                        type=bool,
                        default=True,
                        help="Loading on memory: true or false")

    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.2,
                        help="dropout value")

    args = parser.parse_args()

    print("Load Data", args.dataset)
    data_reader = DataReader(args.dataset, seq_len=args.seq_len)
    neg_data_reader = DataReader(args.dataset,
                                 graphs=data_reader.graphs,
                                 shuffle=True,
                                 duplicate=args.duplicate,
                                 seq_len=args.seq_len)
    # print("Loading Vocab", args.vocab_path)
    print("Loading Vocab")
    vocab = Vocab(data_reader.graphs)
    # vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Shuffle Data")
    'TODO'

    print("Loading Train Dataset", args.dataset)
    train_dataset = CustomBERTDataset(
        data_reader.graphs[:int(len(data_reader) * 0.8)],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)
    # pdb.set_trace()
    neg_train_dataset = CustomBERTDataset(
        neg_data_reader.graphs[:args.duplicate * len(train_dataset)],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)
    # pdb.set_trace()
    assert len(neg_train_dataset) == args.duplicate * len(train_dataset)
    # print("Loading Test Dataset", args.test_dataset)
    print("Loading Dev Dataset", args.dataset)
    test_dataset = CustomBERTDataset(
        data_reader.graphs[int(len(data_reader) * 0.8):],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)  # \
    neg_test_dataset = CustomBERTDataset(
        neg_data_reader.graphs[-args.duplicate * len(test_dataset):],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)  # \
    assert len(neg_test_dataset) == args.duplicate * len(test_dataset)
    # if args.test_dataset is not None else None
    # pdb.set_trace()
    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   collate_fn=my_collate)
    neg_train_data_loader = DataLoader(neg_train_dataset,
                                       batch_size=args.batch_size *
                                       args.duplicate,
                                       num_workers=args.num_workers,
                                       collate_fn=my_collate)

    test_data_loader = DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  collate_fn=my_collate)  # \
    neg_test_data_loader = DataLoader(neg_test_dataset,
                                      batch_size=args.batch_size *
                                      args.duplicate,
                                      num_workers=args.num_workers,
                                      collate_fn=my_collate)  # \
    # if test_dataset is not None else None
    # assert False
    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads,
                dropout=args.dropout)

    print("Creating BERT Trainer")
    # trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
    #                       lr=args.lr, betas=(
    #                           args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
    #                       with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index)
    trainer = BERTTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=test_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq,
                          pad_index=vocab.pad_index)
    # raise NotImplementedError
    print("Training Start")
    best_loss = None
    for epoch in range(args.epochs):
        # test_loss = trainer.test(epoch)

        train_loss = trainer.train(epoch)
        torch.cuda.empty_cache()

        # if test_data_loader is not None:
        test_loss = trainer.test(epoch)
        if best_loss is None or test_loss < best_loss:
            best_loss = test_loss
            trainer.save(epoch, args.output_path)

        torch.cuda.empty_cache()
Ejemplo n.º 11
0
        preds = output.argmax(dim=1)
        for j in range(len(preds)):
            total += 1
            if preds[j] == target[j]:
                total_correct += 1

    return total_correct/total


if __name__ == '__main__':
    mnli = BERTMNLI(TRAIN_DATA_DIR, bert_type=BERT_TYPE)
    match = BERTMNLI(MATCH_DATA_DIR, bert_type=BERT_TYPE)
    mismatch = BERTMNLI(MISMATCH_DATA_DIR, bert_type=BERT_TYPE)

    checkpoint = torch.load('storage/bert-base-dnli.pt')
    model = BERT(bert_type=BERT_TYPE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    ###

    optimizer = Adam(model.parameters(), lr = LEARNING_RATE)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    criterion = nn.CrossEntropyLoss()

    best_acc = 0

    for epoch in range(1, NUM_EPOCHS+1):
        train_loss = train(mnli, model, criterion, optimizer, device)
        match_acc = eval(match, model, device)
        mismatch_acc= eval(mismatch, model, device)
Ejemplo n.º 12
0
    word_list = list(set(" ".join(sentences).split()))
    word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}

    # 建立词表
    for i, w in enumerate(word_list):
        word_dict[w] = i + 4
    number_dict = {i: w for i, w in enumerate(word_dict)}
    vocab_size = len(word_dict)

    # 将句子转为对应的id序列
    token_list = list()
    for sentence in sentences:
        arr = [word_dict[s] for s in sentence.split()]
        token_list.append(arr)

    model = BERT()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    batch = make_batch()
    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(
        torch.LongTensor, zip(*batch))

    for epoch in range(100):
        optimizer.zero_grad()
        logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)
        loss_lm = criterion(logits_lm.transpose(1, 2),
                            masked_tokens)  # for masked LM
        loss_lm = (loss_lm.float()).mean()
        loss_clsf = criterion(logits_clsf,
                              isNext)  # for sentence classification
Ejemplo n.º 13
0
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)
bert_model = BertModel.from_pretrained("bert-base-uncased")
# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v1', do_lower_case=True)
# bert_model = AlbertModel.from_pretrained("albert-base-v1")

model = BERT(2, bert_model)
model = model.to(device)

train_dataloader, validation_dataloader, test_dataloader = get_baseline_dataloader(
    args.data_file, args.batch_size, tokenizer)

optimizer = AdamW(model.parameters(), lr=args.lr)
total_steps = len(train_dataloader) * args.epochs
if new_version:
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1 * total_steps),
        #warmup_steps = 0, # Default value in run_glue.py
        num_training_steps=total_steps)
    #t_total = total_steps)
else:
Ejemplo n.º 14
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--dataset",
                        required=True,
                        type=str,
                        help="dataset")

    parser.add_argument("-o",
                        "--output_path",
                        required=True,
                        type=str,
                        help="ex)output/bert.model")
    parser.add_argument("-t",
                        "--test_path",
                        required=True,
                        type=str,
                        help="ex)output/bert.model")

    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-me",
                        "--markdown_emb_size",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=8,
                        help="number of layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-s",
                        "--seq_len",
                        type=int,
                        default=64,
                        help="maximum sequence len")

    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=64,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=5,
                        help="dataloader worker size")
    parser.add_argument("--duplicate",
                        type=int,
                        default=5,
                        help="dataloader worker size")
    parser.add_argument("--model_path", type=str, help="ex)output/bert.model")
    parser.add_argument("--hinge_loss_start_point", type=int, default=20)
    parser.add_argument("--entropy_start_point", type=int, default=30)
    parser.add_argument("--with_cuda",
                        type=bool,
                        default=True,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=str,
                        default='0',
                        help="CUDA device ids")
    parser.add_argument("--max_graph_num",
                        type=int,
                        default=3000000,
                        help="printing loss every n iter: setting n")

    parser.add_argument("--on_memory",
                        type=bool,
                        default=True,
                        help="Loading on memory: true or false")
    parser.add_argument("--n_topics", type=int, default=50)
    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.2,
                        help="dropout value")
    parser.add_argument("--weak_supervise", action="store_true")
    parser.add_argument(
        "--neighbor",
        action="store_true",
        help="force topic distribution over neighbor nodes to be close")
    parser.add_argument("--min_occur",
                        type=int,
                        default=3,
                        help="minimum of occurrence")

    parser.add_argument("--use_sub_token", action="store_true")
    parser.add_argument("--context",
                        action="store_true",
                        help="use information from neighbor cells")
    parser.add_argument("--markdown", action="store_true", help="use markdown")
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_devices

    print("Load Data", args.dataset)
    data_reader = DataReader(args.dataset,
                             use_sub_token=args.use_sub_token,
                             max_graph_num=args.max_graph_num)

    labeled_data_reader = DataReader(args.test_path,
                                     use_sub_token=args.use_sub_token)

    print("Loading Vocab")
    if args.markdown:
        vocab = UnitedVocab(data_reader.graphs,
                            min_occur=args.min_occur,
                            use_sub_token=args.use_sub_token)

    else:
        vocab = SNAPVocab(data_reader.graphs,
                          min_occur=args.min_occur,
                          use_sub_token=args.use_sub_token)

    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.dataset)
    train_dataset = SNAPDataset(
        data_reader.graphs[:int(len(data_reader) * 0.8)],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate,
        use_sub_token=args.use_sub_token,
        n_topics=args.n_topics,
        markdown=args.markdown)

    print(len(train_dataset))

    # print("Loading Test Dataset", args.test_dataset)
    print("Loading Dev Dataset", args.dataset)
    test_dataset = SNAPDataset(data_reader.graphs[int(len(data_reader) *
                                                      0.8):],
                               vocab,
                               seq_len=args.seq_len,
                               on_memory=args.on_memory,
                               n_neg=args.duplicate,
                               use_sub_token=args.use_sub_token,
                               n_topics=args.n_topics,
                               markdown=args.markdown)  # \
    print(len(test_dataset))

    labeled_dataset = SNAPDataset(labeled_data_reader.graphs,
                                  vocab,
                                  seq_len=args.seq_len,
                                  on_memory=args.on_memory,
                                  use_sub_token=args.use_sub_token,
                                  markdown=args.markdown)

    # if args.test_dataset is not None else None
    # pdb.set_trace()
    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   collate_fn=temp_collate)

    test_data_loader = DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  collate_fn=temp_collate)  # \

    labeled_data_loader = DataLoader(labeled_dataset,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers,
                                     collate_fn=temp_collate)

    # if test_dataset is not None else None
    # assert False
    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads,
                dropout=args.dropout)

    print("Creating BERT Trainer")

    trainer = TempTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=test_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq,
                          pad_index=vocab.pad_index,
                          model_path=args.model_path,
                          weak_supervise=args.weak_supervise,
                          context=args.context,
                          markdown=args.markdown,
                          hinge_loss_start_point=args.hinge_loss_start_point,
                          entropy_start_point=args.entropy_start_point)
    # raise NotImplementedError
    print("Training Start")
    output_folder = args.output_path.split('.')[0]
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    with open(os.path.join(output_folder, './setting.json'), 'w') as fout:
        json.dump(args.__dict__, fout, ensure_ascii=False, indent=2)
    # pdb.set_trace()
    best_loss = None

    for epoch in range(args.epochs):
        train_loss = trainer.train(epoch)

        # if test_data_loader is not None:
        test_loss = trainer.test(epoch)
        trainer.save(epoch, os.path.join(output_folder, args.output_path))

        stages, stage_vecs = trainer.api(labeled_data_loader)

        correct = 0
        zero_cells = 0
        for g in labeled_dataset.graphs:
            if int(g["stage"]) == 0:
                zero_cells += 1
        # print(zero_cells)
        for i, g in enumerate(labeled_dataset.graphs):
            if stages[i] == int(g["stage"]) and int(g["stage"]) != 0:
                correct += 1
            else:
                pass
        accuracy = correct / (len(stages) - zero_cells)

        # print(accuracy)

        with open(os.path.join(output_folder, './results.txt'), 'a') as fout:
            json.dump({
                "epoch": epoch,
                "accuracy": accuracy,
                "loss": test_loss
            }, fout)
            fout.write('\n')

        with open(os.path.join(output_folder, 'graphs_{}.txt'.format(epoch)),
                  'w') as fout:
            for i, g in enumerate(labeled_dataset.graphs):
                g["pred"] = stages[i]
                g["stage_vec"] = stage_vecs[i]
                fout.write(json.dumps(g))
                fout.write('\n')

        torch.cuda.empty_cache()
Ejemplo n.º 15
0
# neg_train_data_loader = DataLoader(
#     neg_train_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate)

# test_data_loader = DataLoader(
#     test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate)  # \
# neg_test_data_loader = DataLoader(
#     neg_test_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate)  # \
# labeled_data_loader = DataLoader(
#     labeled_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate)
# if test_dataset is not None else None
# assert False
# dataset is not None else None
# assert False
print("Building BERT model")
bert = BERT(len(vocab),
            hidden=args.hidden,
            n_layers=args.layers,
            attn_heads=args.attn_heads)

print("Creating BERT Trainer")
# trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
#                       lr=args.lr, betas=(
#                           args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
#                       with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index)
temp_data_loader = DataLoader(temp_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers,
                              collate_fn=my_collate)
trainer = ReconstructionBERTTrainer(bert,
                                    len(vocab),
                                    len(markdown_vocab),
                                    args.markdown_emb_size,
Ejemplo n.º 16
0
    format = 'csv',
    skip_header = True,
    fields = rev_field
)

novel = TabularDataset(
    path = data_novelty_csv_path,
    format = 'csv',
    skip_header = True,
    fields = nov_field
)

review_iter = Iterator(review, batch_size=1, device=device, sort=False, sort_within_batch=False, repeat=False, shuffle=False)
novel_iter = Iterator(novel, batch_size=1, device=device, sort=False, sort_within_batch=False, repeat=False, shuffle=False)

model = BERT(feature_len).to(device)

print("Computing deep features...")

review_features = []
for x in tqdm(review_iter):
    text = x.comment_text.type(torch.LongTensor)
    text = text.to(device)
    feature = model(text)
    review_features.append(feature.detach().cpu().numpy())
review_features = np.vstack(review_features)
print(review_features.shape)

novel_features = []
for x in tqdm(novel_iter):
    text = x.novel.type(torch.LongTensor)
def get_models():
    from model import BERT
    return {'BERT': BERT()}
Ejemplo n.º 18
0
def main(args):
    assert torch.cuda.is_available(), "need to use GPUs"

    use_cuda = torch.cuda.is_available()
    cuda_devices = list(map(int, args.cuda_devices.split(",")))
    is_multigpu = len(cuda_devices) > 1
    device = "cuda"

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    if is_multigpu > 1:
        torch.cuda.manual_seed_all(args.seed)

    data = torch.load(args.data)
    dataset = BERTDataSet(data['word'], data['max_len'], data["dict"],
                          args.batch_size * args.steps)
    training_data = DataLoader(dataset,
                               batch_size=args.batch_size,
                               num_workers=args.num_cpus)

    model = BERT(dataset.word_size, data["max_len"], args.n_stack_layers,
                 args.d_model, args.d_ff, args.n_head, args.dropout)

    print(
        f"BERT have {sum(x.numel() for x in model.parameters())} paramerters in total"
    )

    optimizer = ScheduledOptim(
        torch.nn.DataParallel(
            torch.optim.Adam(model.get_trainable_parameters(),
                             lr=args.lr,
                             betas=(0.9, 0.999),
                             eps=1e-09,
                             weight_decay=0.01),
            device_ids=cuda_devices), args.d_model, args.n_warmup_steps)

    w_criterion = WordCrossEntropy()
    w_criterion = w_criterion.to(device)

    s_criterion = torch.nn.CrossEntropyLoss()

    model = model.to(device)
    model = torch.nn.DataParallel(model, device_ids=cuda_devices)
    model.train()
    for step, datas in enumerate(training_data):
        inp, pos, sent_label, word_label, segment_label = list(
            map(lambda x: x.to(device), datas))
        sent_label = sent_label.view(-1)
        optimizer.zero_grad()
        word, sent = model(inp, pos, segment_label)
        w_loss, w_corrects, tgt_sum = w_criterion(word, word_label)
        s_loss = s_criterion(sent, sent_label)
        if is_multigpu:
            w_loss, s_loss = w_loss.mean(), s_loss.mean()
        loss = w_loss + s_loss
        loss.backward()
        optimizer.step()
        s_corrects = (torch.max(sent, 1)[1].data == sent_label.data).sum()

        print(
            f"[Step {step+1}/{args.steps}] [word_loss: {w_loss:.5f}, sent_loss: {s_loss:.5f}, loss: {loss:.5f}, w_pre: {w_corrects/tgt_sum*100:.2f}% {w_corrects}/{tgt_sum}, s_pre: {float(s_corrects)/args.batch_size*100:.2f}% {s_corrects}/{args.batch_size}]"
        )

        if tf is not None:
            add_summary_value("Word loss", w_loss, step)
            add_summary_value("Sent loss", s_loss, step)
            add_summary_value("Loss", loss, step)
            add_summary_value("Word predict", w_corrects / tgt_sum, step)
            add_summary_value("Sent predict",
                              float(s_corrects) / args.batch_size, step)
            tf_summary_writer.flush()