num_workers=args.num_workers) print("Reading Word Vectors") weights_matrix = ReadWordVec(args.emb_path, args.emb_filename, args.emb_dim) print("Building Model") bert = BERT(len(vocab), weights_matrix, hidden=args.emb_dim, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating Trainer") trainer = BERTTrainer(bert, len(vocab), args.seq_len, train_dataloader=train_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): trainer.train(epoch) trainer.save(epoch, args.output_path) trainer.test(1)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating BERT Trainer") trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): print("eee") trainer.train(epoch) trainer.save(epoch, args.output_path) if test_data_loader is not None: trainer.test(epoch)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-td", "--train_dataset", type=str, required=True, help="train set") parser.add_argument("-vd", "--valid_dataset", type=str, default=None, help="validation set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="vocab path") parser.add_argument("-o", "--output_path", required=True, type=str, help="model save path") parser.add_argument("-fs", "--feed_forward_hidden", type=int, default=4096, help="hidden size of feed-forward network") parser.add_argument("-hs", "--hidden", type=int, default=1024, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=6, help="number of transformer layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-p", "--path_num", type=int, default=100, help="a AST's maximum path num") parser.add_argument("-n", "--node_num", type=int, default=20, help="a path's maximum node num") parser.add_argument("-c", "--code_len", type=int, default=200, help="maximum code len") parser.add_argument("-al", "--alpha", type=int, default=0.75, help="loss weight") parser.add_argument("-b", "--batch_size", type=int, default=4096, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=1, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker num") parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--lr", type=float, default=1e-5, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = TokenVocab.load_vocab(args.vocab_path) # source and target corpus share the vocab print("Vocab Size: ", len(vocab)) print("Loading Train Dataset") train_dataset = TreeBERTDataset(vocab, args.train_dataset, path_num=args.path_num, node_num=args.node_num, code_len=args.code_len, is_fine_tune=False, corpus_lines=args.corpus_lines) print("Loading valid Dataset") valid_dataset = TreeBERTDataset(vocab, args.valid_dataset, path_num=args.path_num, node_num=args.node_num, code_len=args.code_len, is_fine_tune=False, corpus_lines=args.corpus_lines) \ if args.valid_dataset is not None else None # Creating Dataloader train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) valid_data_loader = DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if valid_dataset is not None else None print("Building model") dropout = 0.1 enc = Encoder(len(vocab), args.node_num, args.hidden, args.layers, args.attn_heads, args.feed_forward_hidden, dropout, max_length=args.path_num) dec = Decoder(len(vocab), args.hidden, args.layers, args.attn_heads, args.feed_forward_hidden, dropout, max_length=args.code_len + 2) PAD_IDX = vocab.pad_index transformer = Seq2Seq(enc, dec, args.hidden, PAD_IDX) print("Creating Trainer") trainer = BERTTrainer(transformer, args.alpha, len(vocab), train_dataloader=train_data_loader, test_dataloader=valid_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") min_loss = 10 loss = 0 best_model = None for epoch in range(args.epochs): trainer.train(epoch) if valid_data_loader is not None: loss = trainer.test(epoch) if min_loss > loss: best_model = copy.deepcopy(trainer.transformer) trainer.save(epoch, best_model, args.output_path)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--dataset", required=True, type=str, help="dataset") # parser.add_argument("-c", "--train_dataset", required=True, # type=str, help="train dataset for train bert") # parser.add_argument("-t", "--test_dataset", type=str, # default=None, help="test set for evaluate train set") # parser.add_argument("-v", "--vocab_path", required=True, # type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=64, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--duplicate", type=int, default=5, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") parser.add_argument("--dropout", type=float, default=0.2, help="dropout value") args = parser.parse_args() print("Load Data", args.dataset) data_reader = DataReader(args.dataset, seq_len=args.seq_len) neg_data_reader = DataReader(args.dataset, graphs=data_reader.graphs, shuffle=True, duplicate=args.duplicate, seq_len=args.seq_len) # print("Loading Vocab", args.vocab_path) print("Loading Vocab") vocab = Vocab(data_reader.graphs) # vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Shuffle Data") 'TODO' print("Loading Train Dataset", args.dataset) train_dataset = CustomBERTDataset( data_reader.graphs[:int(len(data_reader) * 0.8)], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # pdb.set_trace() neg_train_dataset = CustomBERTDataset( neg_data_reader.graphs[:args.duplicate * len(train_dataset)], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # pdb.set_trace() assert len(neg_train_dataset) == args.duplicate * len(train_dataset) # print("Loading Test Dataset", args.test_dataset) print("Loading Dev Dataset", args.dataset) test_dataset = CustomBERTDataset( data_reader.graphs[int(len(data_reader) * 0.8):], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # \ neg_test_dataset = CustomBERTDataset( neg_data_reader.graphs[-args.duplicate * len(test_dataset):], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # \ assert len(neg_test_dataset) == args.duplicate * len(test_dataset) # if args.test_dataset is not None else None # pdb.set_trace() print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate) neg_train_data_loader = DataLoader(neg_train_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate) # \ neg_test_data_loader = DataLoader(neg_test_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate) # \ # if test_dataset is not None else None # assert False print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout) print("Creating BERT Trainer") # trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, # lr=args.lr, betas=( # args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, # with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index) trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index) # raise NotImplementedError print("Training Start") best_loss = None for epoch in range(args.epochs): # test_loss = trainer.test(epoch) train_loss = trainer.train(epoch) torch.cuda.empty_cache() # if test_data_loader is not None: test_loss = trainer.test(epoch) if best_loss is None or test_loss < best_loss: best_loss = test_loss trainer.save(epoch, args.output_path) torch.cuda.empty_cache()
vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset( args.test_dataset, vocab, seq_len=args.seq_len) if args.test_dataset is not None else None train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader) for epoch in range(args.epochs): trainer.train(epoch) trainer.save(args.output_dir, epoch) if test_data_loader is not None: trainer.test(epoch)