def main(): parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_path", required=False, type=str, help="model of pretrain",default='../output/model_bert/bert_ep2.model') parser.add_argument("-v", "--vocab_path", required=False, type=str, help="path of vocab",default='../data/vocab.test') args = parser.parse_args() model_path = args.model_path vocab_path = args.vocab_path vocab = WordVocab.load_vocab(vocab_path) model = torch.load(model_path) model.eval() sent = '嗯 不好意思 没有 时间'.split(hp.split_mark) text = '嗯 不好意思 没有 时间' sent1, label = random_word(text, vocab) sent1 = torch.tensor(sent1).long().unsqueeze(0) #mask_lm_output, attn_list = model.forward(sent1) # chars = [] # for char in sent: # chars.append(vocab.char2index(char)) for layer in range(3): fig, axs = plt.subplots(1, 4, figsize=(20, 10)) print("Layer", layer+1) for h in range(4): # a = model.bert.layers[layer].multihead.attention[0,h].data draw(model.bert.layers[layer].multihead.attention[0, h].data, #[0, h].data, sent, sent if h == 0 else [], ax=axs[h]) plt.show()
def train(): os.environ['CUDA_LAUNCH_BLOCKING'] = "1" parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--valid_dataset", required=True, type=str, help="valid set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-m", "--model_path", required=True, type=str, help="Path of exist mlm model") parser.add_argument("-w", "--num_workers", type=int, default=1, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=[0, 1, 2, 3], help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument('--mode', type=str, default='train', help="train or test") parser.add_argument('--seed', type=int, default=3431, help="random seed for initialization") args = parser.parse_args() set_seed(args) paths = Paths(args.output_path) mode = args.mode print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", vocab.vocab_size) args.char_nums = vocab.vocab_size print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, corpus_lines=args.corpus_lines, on_memory=args.on_memory, train=False) print("Loading Valid Dataset", args.valid_dataset) valid_dataset = BERTDataset(args.valid_dataset, vocab, on_memory=args.on_memory, train=False) \ if args.valid_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch),num_workers=args.num_workers, shuffle=True) valid_data_loader = DataLoader(valid_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch), num_workers=args.num_workers, shuffle=True) \ if valid_dataset is not None else None print("Load BERT model") # bert = BERT(embed_dim=hp.embed_dim, hidden=hp.hidden, args=args) bert = torch.load(args.model_path) print("Creating BERT Trainer") global_step = 0 trainer = BERTTrainer(bert, vocab.vocab_size, train_dataloader=train_data_loader, test_dataloader=valid_data_loader, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, args=args, global_step=global_step, path=paths) print("Training Start") if mode == 'train': trainer.train() if mode == 'eval': trainer.eval()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_path", required=True, type=str, help="model of pretrain") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="path of vocab") args = parser.parse_args() model_path = args.model_path vocab_path = args.vocab_path vocab = WordVocab.load_vocab(vocab_path) model = torch.load(model_path) model.eval() sent = '_I _l _o _v _e _C _h _i _n _a _!'.split() text = 'I love China!' sent1, label = random_word(text, vocab) sent1 = torch.tensor(sent1).long().unsqueeze(0) mask_lm_output, attn_list = model.forward(sent1) chars = [] for char in sent: chars.append(vocab.char2index(char)) for layer in range(3): fig, axs = plt.subplots(1, 4, figsize=(20, 10)) print("Layer", layer + 1) for h in range(4): # a = model.bert.layers[layer].multihead.attention[0,h].data draw( model.bert.layers[layer].multihead.attention[ 0, h].data, #[0, h].data, sent, sent if h == 0 else [], ax=axs[h]) plt.show()
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", action='store_true', help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating BERT Trainer") trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): trainer.train(epoch) trainer.save(epoch, args.output_path) if test_data_loader is not None: trainer.test(epoch)
default=None, type=str, required=True, help="model path.") args = parser.parse_args() test_set = [] with open(args.test_corpus, 'r') as csvfile: csv_reader = csv.reader(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in csv_reader: test_set.append([row[1], int(row[0])]) vocab = WordVocab.load_vocab(args.vocab) sea_test, _ = tensor_generate(test_set) seq_list, label_list, target_list = data_generate(test_set) test_dataset = TensorDataset(seq_list, label_list, sea_test, target_list) classifiy_model = torch.load(args.model_path).cuda() model_out, true_out = test_model_out(test_dataset, classifiy_model, args.batch_size) print('Recall:' + str(recall_score(true_out, model_out))) print('Precision:' + str(precision_score(true_out, model_out))) print('F1:' + str(f1_score(true_out, model_out))) print('Accuracy:' + str(accuracy_score(true_out, model_out)))
import os import sys path = os.path.abspath('.') if path not in sys.path: sys.path.append(path) from dataset.vocab import WordVocab from utils import * if __name__ == "__main__": with open("data/corpus.txt", "r", encoding='utf-8') as f: vocab = WordVocab(f, min_freq=1) to_pkl(vocab, "data/vocab.pkl") print("vocab len:", len(vocab))