def __init__(self, device=None, jit=False): self.device = device self.jit = jit embed_dim = 64 epochs = 5 num_labels = 4 root = str(Path(__file__).parent) with open(f"{root}/example_batch.pkl", "rb") as f: batch_size, vocab_size, text, offsets, cls = pickle.load(f) self.text, self.offsets, self.cls = [ t.to(self.device) for t in (text, offsets, cls) ] bert_embed = torch.nn.EmbeddingBag(vocab_size, embed_dim) self.model = BertForSequenceClassification(bert_embed, num_labels=num_labels).to( self.device) if self.jit: self.model = torch.jit.script(self.model) self.criterion = torch.nn.CrossEntropyLoss().to(device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=4.0) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 1, gamma=0.9)
class Model: def __init__(self, device=None, jit=False): self.device = device self.jit = jit embed_dim = 64 epochs = 5 num_labels = 4 root = str(Path(__file__).parent) with open(f"{root}/example_batch.pkl", "rb") as f: batch_size, vocab_size, text, offsets, cls = pickle.load(f) self.text, self.offsets, self.cls = [ t.to(self.device) for t in (text, offsets, cls) ] bert_embed = torch.nn.EmbeddingBag(vocab_size, embed_dim) self.model = BertForSequenceClassification(bert_embed, num_labels=num_labels).to( self.device) if self.jit: self.model = torch.jit.script(self.model) self.criterion = torch.nn.CrossEntropyLoss().to(device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=4.0) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 1, gamma=0.9) def get_module(self): return self.model, (self.text, self.offsets) def eval(self, niter=1): with torch.no_grad(): for _ in range(niter): output = self.model(self.text, self.offsets) loss = self.criterion(output, self.cls) def train(self, niter=1): for _ in range(niter): self.optimizer.zero_grad() output = self.model(self.text, self.offsets) loss = self.criterion(output, self.cls) loss.backward() self.optimizer.step()
# training device = 0 if torch.cuda.is_available() else 'cpu' ''' EMBED_DIM = 100 model = CNNText((len(vocab),EMBED_DIM), num_classes=len(vocab_target), dropout=0.1) metrics=AccuracyMetric() loss = CrossEntropyLoss() optimizer=optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) N_EPOCHS = 10 BATCH_SIZE = 16 trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics,optimizer=optimizer,n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, device=device) trainer.train() ''' embed = BertEmbedding(vocab, model_dir_or_name='en', include_cls_sep=True) model = BertForSequenceClassification(embed, len(vocab_target)) trainer = Trainer(train_data, model, optimizer=Adam(model_params=model.parameters(), lr=2e-5), loss=CrossEntropyLoss(), device=device, batch_size=8, dev_data=dev_data, metrics=AccuracyMetric(), n_epochs=2, print_every=1) trainer.train() saver = ModelSaver("save_model/bert2021.1.19.pkl") saver.save_pytorch(model)
vocab.from_dataset(train_dataset, field_name='words', no_create_entry_dataset=[test_dataset]) vocab.index_dataset(train_dataset, test_dataset, field_name='words') target_vocab = Vocabulary(padding=None, unknown=None) target_vocab.from_dataset(train_dataset, field_name='target', no_create_entry_dataset=[test_dataset]) target_vocab.index_dataset(train_dataset, test_dataset, field_name='target') '''build bundle''' data_dict = {"train":train_dataset, "test":test_dataset} vocab_dict = {"words":vocab, "target":target_vocab} data_bundle = DataBundle(vocab_dict, data_dict) print(data_bundle) '''build model''' embed = BertEmbedding(data_bundle.get_vocab('words'), model_dir_or_name='en-base-uncased', include_cls_sep=True) model = BertForSequenceClassification(embed, len(data_bundle.get_vocab('target'))) # model = BertForSequenceClassification(embed, 2) device = 0 if torch.cuda.is_available() else 'cpu' trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer=Adam(model_params=model.parameters(), lr=2e-5), loss=CrossEntropyLoss(), device=device, batch_size=8, dev_data=data_bundle.get_dataset('train'), metrics=AccuracyMetric(), n_epochs=10, print_every=1) trainer.train() tester = Tester(data_bundle.get_dataset('test'), model, batch_size=128, metrics=AccuracyMetric()) tester.test()
help='embed dim. (default=32)') parser.add_argument('--epochs', type=int, default=5, help='num epochs (default=5)') parser.add_argument('--torchscript', type=bool, default=False, help='torchscript the model') args = parser.parse_args() embed_dim = args.embed_dim BATCH_SIZE = args.batch_size device = torch.device("cuda" if torch.cuda.is_available() else "cpu") bert_embed = torch.nn.EmbeddingBag(len(train_dataset.vocab), embed_dim) model = BertForSequenceClassification(bert_embed, num_labels=4).to(device) if args.torchscript: model = torch.jit.script(model) print("model are torchscript") def generate_batch(batch): label = torch.tensor([entry[0] for entry in batch]) text = [entry[1] for entry in batch] offsets = [0] + [len(entry) for entry in text] # torch.Tensor.cumsum returns the cumulative sum # of elements in the dimension dim. # torch.Tensor([1.0, 2.0, 3.0]).cumsum(dim=0) offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
test_data = test_data_loader._load('data/1/test.tsv') test_data.rename_field('Phrase', 'raw_words') test_data.apply(get_words, new_field_name='words') vocab_all.index_dataset(test_data, field_name='words') test_data.set_input('words') ''' EMBED_DIM = 100 model = CNNText((len(vocab_all),EMBED_DIM), num_classes=len(vocab_target), dropout=0.1) ''' device = 0 if torch.cuda.is_available() else 'cpu' embed = BertEmbedding(vocab_all, model_dir_or_name='en', include_cls_sep=True) model = BertForSequenceClassification(embed, len(vocab_target)) ModelLoader.load_pytorch(model, 'save_model/ceshi.pkl') _move_model_to_device(model, device=device) #pred = model_cnn.predict(torch.LongTensor([test_data[10]['words']])) def predict(instance): x_batch = torch.LongTensor([instance['words']]) x_batch = x_batch.to(device=_get_model_device(model)) pred = model.predict(x_batch) pred = vocab_target.to_word(int(pred['pred'])) return pred test_data.apply(predict, new_field_name='target')