num_workers=args.num_workers)

print("Reading Word Vectors")
weights_matrix = ReadWordVec(args.emb_path, args.emb_filename, args.emb_dim)

print("Building Model")
bert = BERT(len(vocab),
            weights_matrix,
            hidden=args.emb_dim,
            n_layers=args.layers,
            attn_heads=args.attn_heads)

print("Creating Trainer")
trainer = BERTTrainer(bert,
                      len(vocab),
                      args.seq_len,
                      train_dataloader=train_data_loader,
                      lr=args.lr,
                      betas=(args.adam_beta1, args.adam_beta2),
                      weight_decay=args.adam_weight_decay,
                      with_cuda=args.with_cuda,
                      cuda_devices=args.cuda_devices,
                      log_freq=args.log_freq)

print("Training Start")
for epoch in range(args.epochs):
    trainer.train(epoch)
    trainer.save(epoch, args.output_path)

trainer.test(1)
Example #2
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert")
    parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set")
    parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab")
    parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model")

    parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model")
    parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers")
    parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads")
    parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len")

    parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size")
    parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs")
    parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size")

    parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false")
    parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus")
    parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids")
    parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false")

    parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam")
    parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam")
    parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
    parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value")

    args = parser.parse_args()

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len,
                                corpus_lines=args.corpus_lines, on_memory=args.on_memory)

    print("Loading Test Dataset", args.test_dataset)
    test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
        if args.test_dataset is not None else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None

    print("Building BERT model")
    bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
                          lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq)

    print("Training Start")
    for epoch in range(args.epochs):
        print("eee")
        trainer.train(epoch)
        trainer.save(epoch, args.output_path)

        if test_data_loader is not None:
            trainer.test(epoch)
Example #3
0
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument("-td",
                        "--train_dataset",
                        type=str,
                        required=True,
                        help="train set")
    parser.add_argument("-vd",
                        "--valid_dataset",
                        type=str,
                        default=None,
                        help="validation set")
    parser.add_argument("-v",
                        "--vocab_path",
                        required=True,
                        type=str,
                        help="vocab path")
    parser.add_argument("-o",
                        "--output_path",
                        required=True,
                        type=str,
                        help="model save path")

    parser.add_argument("-fs",
                        "--feed_forward_hidden",
                        type=int,
                        default=4096,
                        help="hidden size of feed-forward network")
    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=1024,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=6,
                        help="number of transformer layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-p",
                        "--path_num",
                        type=int,
                        default=100,
                        help="a AST's maximum path num")
    parser.add_argument("-n",
                        "--node_num",
                        type=int,
                        default=20,
                        help="a path's maximum node num")
    parser.add_argument("-c",
                        "--code_len",
                        type=int,
                        default=200,
                        help="maximum code len")

    parser.add_argument("-al",
                        "--alpha",
                        type=int,
                        default=0.75,
                        help="loss weight")
    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=4096,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=1,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=0,
                        help="dataloader worker num")

    parser.add_argument("--with_cuda",
                        type=bool,
                        default=False,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=int,
                        nargs='+',
                        default=None,
                        help="CUDA device ids")

    parser.add_argument("--lr",
                        type=float,
                        default=1e-5,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")

    args = parser.parse_args()

    print("Loading Vocab", args.vocab_path)
    vocab = TokenVocab.load_vocab(args.vocab_path)
    # source and target corpus share the vocab
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset")
    train_dataset = TreeBERTDataset(vocab,
                                    args.train_dataset,
                                    path_num=args.path_num,
                                    node_num=args.node_num,
                                    code_len=args.code_len,
                                    is_fine_tune=False,
                                    corpus_lines=args.corpus_lines)

    print("Loading valid Dataset")
    valid_dataset = TreeBERTDataset(vocab, args.valid_dataset, path_num=args.path_num, node_num=args.node_num,
                                    code_len=args.code_len, is_fine_tune=False, corpus_lines=args.corpus_lines) \
        if args.valid_dataset is not None else None

    # Creating Dataloader
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers)
    valid_data_loader = DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if valid_dataset is not None else None

    print("Building model")
    dropout = 0.1
    enc = Encoder(len(vocab),
                  args.node_num,
                  args.hidden,
                  args.layers,
                  args.attn_heads,
                  args.feed_forward_hidden,
                  dropout,
                  max_length=args.path_num)
    dec = Decoder(len(vocab),
                  args.hidden,
                  args.layers,
                  args.attn_heads,
                  args.feed_forward_hidden,
                  dropout,
                  max_length=args.code_len + 2)

    PAD_IDX = vocab.pad_index
    transformer = Seq2Seq(enc, dec, args.hidden, PAD_IDX)

    print("Creating Trainer")
    trainer = BERTTrainer(transformer,
                          args.alpha,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=valid_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq)

    print("Training Start")
    min_loss = 10
    loss = 0
    best_model = None
    for epoch in range(args.epochs):
        trainer.train(epoch)

        if valid_data_loader is not None:
            loss = trainer.test(epoch)

        if min_loss > loss:
            best_model = copy.deepcopy(trainer.transformer)

    trainer.save(epoch, best_model, args.output_path)
Example #4
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--dataset",
                        required=True,
                        type=str,
                        help="dataset")
    # parser.add_argument("-c", "--train_dataset", required=True,
    #                     type=str, help="train dataset for train bert")
    # parser.add_argument("-t", "--test_dataset", type=str,
    #                     default=None, help="test set for evaluate train set")
    # parser.add_argument("-v", "--vocab_path", required=True,
    #                     type=str, help="built vocab model path with bert-vocab")
    parser.add_argument("-o",
                        "--output_path",
                        required=True,
                        type=str,
                        help="ex)output/bert.model")

    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=8,
                        help="number of layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-s",
                        "--seq_len",
                        type=int,
                        default=64,
                        help="maximum sequence len")

    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=64,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=5,
                        help="dataloader worker size")
    parser.add_argument("--duplicate",
                        type=int,
                        default=5,
                        help="dataloader worker size")

    parser.add_argument("--with_cuda",
                        type=bool,
                        default=True,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=int,
                        nargs='+',
                        default=None,
                        help="CUDA device ids")
    parser.add_argument("--on_memory",
                        type=bool,
                        default=True,
                        help="Loading on memory: true or false")

    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.2,
                        help="dropout value")

    args = parser.parse_args()

    print("Load Data", args.dataset)
    data_reader = DataReader(args.dataset, seq_len=args.seq_len)
    neg_data_reader = DataReader(args.dataset,
                                 graphs=data_reader.graphs,
                                 shuffle=True,
                                 duplicate=args.duplicate,
                                 seq_len=args.seq_len)
    # print("Loading Vocab", args.vocab_path)
    print("Loading Vocab")
    vocab = Vocab(data_reader.graphs)
    # vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Shuffle Data")
    'TODO'

    print("Loading Train Dataset", args.dataset)
    train_dataset = CustomBERTDataset(
        data_reader.graphs[:int(len(data_reader) * 0.8)],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)
    # pdb.set_trace()
    neg_train_dataset = CustomBERTDataset(
        neg_data_reader.graphs[:args.duplicate * len(train_dataset)],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)
    # pdb.set_trace()
    assert len(neg_train_dataset) == args.duplicate * len(train_dataset)
    # print("Loading Test Dataset", args.test_dataset)
    print("Loading Dev Dataset", args.dataset)
    test_dataset = CustomBERTDataset(
        data_reader.graphs[int(len(data_reader) * 0.8):],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)  # \
    neg_test_dataset = CustomBERTDataset(
        neg_data_reader.graphs[-args.duplicate * len(test_dataset):],
        vocab,
        seq_len=args.seq_len,
        on_memory=args.on_memory,
        n_neg=args.duplicate)  # \
    assert len(neg_test_dataset) == args.duplicate * len(test_dataset)
    # if args.test_dataset is not None else None
    # pdb.set_trace()
    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   collate_fn=my_collate)
    neg_train_data_loader = DataLoader(neg_train_dataset,
                                       batch_size=args.batch_size *
                                       args.duplicate,
                                       num_workers=args.num_workers,
                                       collate_fn=my_collate)

    test_data_loader = DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  collate_fn=my_collate)  # \
    neg_test_data_loader = DataLoader(neg_test_dataset,
                                      batch_size=args.batch_size *
                                      args.duplicate,
                                      num_workers=args.num_workers,
                                      collate_fn=my_collate)  # \
    # if test_dataset is not None else None
    # assert False
    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads,
                dropout=args.dropout)

    print("Creating BERT Trainer")
    # trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
    #                       lr=args.lr, betas=(
    #                           args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
    #                       with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index)
    trainer = BERTTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=test_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq,
                          pad_index=vocab.pad_index)
    # raise NotImplementedError
    print("Training Start")
    best_loss = None
    for epoch in range(args.epochs):
        # test_loss = trainer.test(epoch)

        train_loss = trainer.train(epoch)
        torch.cuda.empty_cache()

        # if test_data_loader is not None:
        test_loss = trainer.test(epoch)
        if best_loss is None or test_loss < best_loss:
            best_loss = test_loss
            trainer.save(epoch, args.output_path)

        torch.cuda.empty_cache()
Example #5
0
                            vocab,
                            seq_len=args.seq_len,
                            corpus_lines=args.corpus_lines)

print("Loading Test Dataset", args.test_dataset)
test_dataset = BERTDataset(
    args.test_dataset, vocab,
    seq_len=args.seq_len) if args.test_dataset is not None else None

train_data_loader = DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               num_workers=args.num_workers)
test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
    if test_dataset is not None else None

bert = BERT(len(vocab),
            hidden=args.hidden,
            n_layers=args.layers,
            attn_heads=args.attn_heads)
trainer = BERTTrainer(bert,
                      len(vocab),
                      train_dataloader=train_data_loader,
                      test_dataloader=test_data_loader)

for epoch in range(args.epochs):
    trainer.train(epoch)
    trainer.save(args.output_dir, epoch)

    if test_data_loader is not None:
        trainer.test(epoch)