def run(): print('1.Loading data...') dfx = pd.read_csv(config.TRAINING_FILE).fillna("none") # only train 2000 entries dfx = dfx[:2000] dfx.sentiment = dfx.sentiment.apply(lambda x: 1 if x == "positive" else 0) df_train, df_valid = model_selection.train_test_split( dfx, test_size=0.1, random_state=42, stratify=dfx.sentiment.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) print('Creating dataset...') train_dataset = dataset.BERTDataset(review=df_train.review.values, target=df_train.sentiment.values) valid_dataset = dataset.BERTDataset(review=df_valid.review.values, target=df_valid.sentiment.values) print('Creating dataloader...') train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print('Building Bert Model...') model = BERTBaseUncased() print("Creating BERT Trainer...") trainer = BERTTrainer(model=model, train_dataloader=train_data_loader, test_dataloader=valid_data_loader, lr=config.LR, with_cuda=config.USE_CUDA) # model = nn.DataParallel(model) print('Training Start...') best_accuracy = 0 for epoch in range(config.EPOCHS): train_acc, train_loss = trainer.train_fn(epoch, len(df_train)) print(f'Train loss: {train_loss} Train accuracy: {train_acc:.4%}') outputs, targets = trainer.eval_fn() outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print(f"Accuracy Score = {accuracy:.2%}") if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) best_accuracy = accuracy
def main(): random.seed(rdn_seed) np.random.seed(rdn_seed) torch.manual_seed(rdn_seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() print("device: {}, n_gpu: {}".format(device, n_gpu)) if device == "cuda": torch.cuda.manual_seed_all(rdn_seed) tokenizer = BehaviorsBERTTokenizer(vocab_file) print("Vocab size:", tokenizer.vocab_size) train_dataset = BERTDataset(corpus_path, tokenizer, max_seq_len, corpus_lines=corpus_lines) batch_size = per_gpu_batch_size * n_gpu train_dataloader = DataLoader(train_dataset, batch_size=batch_size) bert = BERT(vocab_size=tokenizer.vocab_size, hidden=hidden, n_layers=layers, attn_heads=attn_heads, max_seq_len=max_seq_len) trainer = BERTTrainer(bert, tokenizer.vocab_size, epochs, tensorboard_log_dir=tensorboard_log_dir, output_path=output_path, train_dataloader=train_dataloader, with_cuda=torch.cuda.is_available(), log_freq=100, save_steps=100000) trainer.train()
from trainer import BERTTrainer from general_utils.utils import json_reader if __name__ == "__main__": config = json_reader('loader_config.json') trainer = BERTTrainer(config) trainer.run()
batch_size=args.batch_size, num_workers=args.num_workers) print("Reading Word Vectors") weights_matrix = ReadWordVec(args.emb_path, args.emb_filename, args.emb_dim) print("Building Model") bert = BERT(len(vocab), weights_matrix, hidden=args.emb_dim, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating Trainer") trainer = BERTTrainer(bert, len(vocab), args.seq_len, train_dataloader=train_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): trainer.train(epoch) trainer.save(epoch, args.output_path) trainer.test(1)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating BERT Trainer") trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): print("eee") trainer.train(epoch) trainer.save(epoch, args.output_path) if test_data_loader is not None: trainer.test(epoch)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--dataset", required=True, type=str, help="dataset") # parser.add_argument("-c", "--train_dataset", required=True, # type=str, help="train dataset for train bert") # parser.add_argument("-t", "--test_dataset", type=str, # default=None, help="test set for evaluate train set") # parser.add_argument("-v", "--vocab_path", required=True, # type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=64, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--duplicate", type=int, default=5, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") parser.add_argument("--dropout", type=float, default=0.2, help="dropout value") args = parser.parse_args() print("Load Data", args.dataset) data_reader = DataReader(args.dataset, seq_len=args.seq_len) neg_data_reader = DataReader(args.dataset, graphs=data_reader.graphs, shuffle=True, duplicate=args.duplicate, seq_len=args.seq_len) # print("Loading Vocab", args.vocab_path) print("Loading Vocab") vocab = Vocab(data_reader.graphs) # vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Shuffle Data") 'TODO' print("Loading Train Dataset", args.dataset) train_dataset = CustomBERTDataset( data_reader.graphs[:int(len(data_reader) * 0.8)], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # pdb.set_trace() neg_train_dataset = CustomBERTDataset( neg_data_reader.graphs[:args.duplicate * len(train_dataset)], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # pdb.set_trace() assert len(neg_train_dataset) == args.duplicate * len(train_dataset) # print("Loading Test Dataset", args.test_dataset) print("Loading Dev Dataset", args.dataset) test_dataset = CustomBERTDataset( data_reader.graphs[int(len(data_reader) * 0.8):], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # \ neg_test_dataset = CustomBERTDataset( neg_data_reader.graphs[-args.duplicate * len(test_dataset):], vocab, seq_len=args.seq_len, on_memory=args.on_memory, n_neg=args.duplicate) # \ assert len(neg_test_dataset) == args.duplicate * len(test_dataset) # if args.test_dataset is not None else None # pdb.set_trace() print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate) neg_train_data_loader = DataLoader(neg_train_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=my_collate) # \ neg_test_data_loader = DataLoader(neg_test_dataset, batch_size=args.batch_size * args.duplicate, num_workers=args.num_workers, collate_fn=my_collate) # \ # if test_dataset is not None else None # assert False print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads, dropout=args.dropout) print("Creating BERT Trainer") # trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, # lr=args.lr, betas=( # args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, # with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index) trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, pad_index=vocab.pad_index) # raise NotImplementedError print("Training Start") best_loss = None for epoch in range(args.epochs): # test_loss = trainer.test(epoch) train_loss = trainer.train(epoch) torch.cuda.empty_cache() # if test_data_loader is not None: test_loss = trainer.test(epoch) if best_loss is None or test_loss < best_loss: best_loss = test_loss trainer.save(epoch, args.output_path) torch.cuda.empty_cache()
def train(): parser = argparse.ArgumentParser() parser.add_argument("-td", "--train_dataset", type=str, required=True, help="train set") parser.add_argument("-vd", "--valid_dataset", type=str, default=None, help="validation set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="vocab path") parser.add_argument("-o", "--output_path", required=True, type=str, help="model save path") parser.add_argument("-fs", "--feed_forward_hidden", type=int, default=4096, help="hidden size of feed-forward network") parser.add_argument("-hs", "--hidden", type=int, default=1024, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=6, help="number of transformer layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-p", "--path_num", type=int, default=100, help="a AST's maximum path num") parser.add_argument("-n", "--node_num", type=int, default=20, help="a path's maximum node num") parser.add_argument("-c", "--code_len", type=int, default=200, help="maximum code len") parser.add_argument("-al", "--alpha", type=int, default=0.75, help="loss weight") parser.add_argument("-b", "--batch_size", type=int, default=4096, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=1, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker num") parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--lr", type=float, default=1e-5, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = TokenVocab.load_vocab(args.vocab_path) # source and target corpus share the vocab print("Vocab Size: ", len(vocab)) print("Loading Train Dataset") train_dataset = TreeBERTDataset(vocab, args.train_dataset, path_num=args.path_num, node_num=args.node_num, code_len=args.code_len, is_fine_tune=False, corpus_lines=args.corpus_lines) print("Loading valid Dataset") valid_dataset = TreeBERTDataset(vocab, args.valid_dataset, path_num=args.path_num, node_num=args.node_num, code_len=args.code_len, is_fine_tune=False, corpus_lines=args.corpus_lines) \ if args.valid_dataset is not None else None # Creating Dataloader train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) valid_data_loader = DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if valid_dataset is not None else None print("Building model") dropout = 0.1 enc = Encoder(len(vocab), args.node_num, args.hidden, args.layers, args.attn_heads, args.feed_forward_hidden, dropout, max_length=args.path_num) dec = Decoder(len(vocab), args.hidden, args.layers, args.attn_heads, args.feed_forward_hidden, dropout, max_length=args.code_len + 2) PAD_IDX = vocab.pad_index transformer = Seq2Seq(enc, dec, args.hidden, PAD_IDX) print("Creating Trainer") trainer = BERTTrainer(transformer, args.alpha, len(vocab), train_dataloader=train_data_loader, test_dataloader=valid_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") min_loss = 10 loss = 0 best_model = None for epoch in range(args.epochs): trainer.train(epoch) if valid_data_loader is not None: loss = trainer.test(epoch) if min_loss > loss: best_model = copy.deepcopy(trainer.transformer) trainer.save(epoch, best_model, args.output_path)
vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset( args.test_dataset, vocab, seq_len=args.seq_len) if args.test_dataset is not None else None train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader) for epoch in range(args.epochs): trainer.train(epoch) trainer.save(args.output_dir, epoch) if test_data_loader is not None: trainer.test(epoch)
print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating BERT Trainer") trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay) print("Training Start") for epoch in range(args.epochs): trainer.train(epoch) trainer.save(args.output_dir, epoch) if test_data_loader is not None: trainer.test(epoch)