def train(config, device): dataset = News20Dataset(config.cache_data_dir, config.vocab_path, is_train=True) dataloader = MyDataLoader(dataset, config.batch_size) model = HierarchialAttentionNetwork( num_classes=dataset.num_classes, vocab_size=dataset.vocab_size, embed_dim=config.embed_dim, word_gru_hidden_dim=config.word_gru_hidden_dim, sent_gru_hidden_dim=config.sent_gru_hidden_dim, word_gru_num_layers=config.word_gru_num_layers, sent_gru_num_layers=config.sent_gru_num_layers, word_att_dim=config.word_att_dim, sent_att_dim=config.sent_att_dim).to(device) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr) criterion = nn.NLLLoss(reduction='sum').to(device) trainer = Trainer(config, model, optimizer, criterion, dataloader) trainer.train()
def main(): """ Training and validation. """ global checkpoint, start_epoch, word_map # Initialize model or load checkpoint if checkpoint is not None: checkpoint = torch.load(checkpoint) model = checkpoint['model'] optimizer = checkpoint['optimizer'] word_map = checkpoint['word_map'] start_epoch = checkpoint['epoch'] + 1 print( '\nLoaded checkpoint from epoch %d.\n' % (start_epoch - 1)) else: embeddings, emb_size = load_word2vec_embeddings(word2vec_file, word_map) # load pre-trained word2vec embeddings model = HierarchialAttentionNetwork(n_classes=n_classes, vocab_size=len(word_map), emb_size=emb_size, word_rnn_size=word_rnn_size, sentence_rnn_size=sentence_rnn_size, word_rnn_layers=word_rnn_layers, sentence_rnn_layers=sentence_rnn_layers, word_att_size=word_att_size, sentence_att_size=sentence_att_size, dropout=dropout) model.sentence_attention.word_attention.init_embeddings( embeddings) # initialize embedding layer with pre-trained embeddings model.sentence_attention.word_attention.fine_tune_embeddings(fine_tune_word_embeddings) # fine-tune optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=lr) # Loss functions criterion = nn.CrossEntropyLoss() # Move to device model = model.to(device) criterion = criterion.to(device) if device == 'cuda': model = torch.nn.DataParallel(model) cudnn.benchmark = True # DataLoaders train_loader = torch.utils.data.DataLoader(HANDataset(data_folder, 'train'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # One epoch's training train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) # Decay learning rate every epoch adjust_learning_rate(optimizer, 0.1) # Save checkpoint save_checkpoint(epoch, model, optimizer, word_map)
def __init__(self, word2vec_config_path, word2vec_model_path, HAN_mdoel_path, HAN_config_path, tokenizer_name="word_tokenizer", device=torch.device("cpu")): class Struct: def __init__(self, **entries): self.__dict__.update(entries) self.device = device ##Load word2vec config with open(word2vec_config_path, 'r') as f: word2vec_config = json.load(f) word2vec_config = Struct(**word2vec_config) self.word2vec_model = MyGensimModel(word2vec_model_path) ##Load tokenizer self.tokenizer = MyTokenizer(tokenizer_name) ##Load HAN config with open(HAN_config_path, 'r') as f: HAN_config = json.load(f) HAN_config = Struct(**HAN_config) ##Load HAN model self.model = HierarchialAttentionNetwork( dictionary_size=self.word2vec_model.dict_size, embedding_size=word2vec_config.size, hidden_size=HAN_config.hidden_size, attention_size=HAN_config.atten_size, num_class=HAN_config.num_class, n_layers=HAN_config.n_layers, device=device) self.model.set_embedding(self.word2vec_model.embedding) check_point = torch.load(HAN_mdoel_path) self.model.load_state_dict(check_point["model"]) self.model.to(device)
class Classifier(): def __init__(self, word2vec_config_path, word2vec_model_path, HAN_mdoel_path, HAN_config_path, tokenizer_name="word_tokenizer", device=torch.device("cpu")): class Struct: def __init__(self, **entries): self.__dict__.update(entries) self.device = device ##Load word2vec config with open(word2vec_config_path, 'r') as f: word2vec_config = json.load(f) word2vec_config = Struct(**word2vec_config) self.word2vec_model = MyGensimModel(word2vec_model_path) ##Load tokenizer self.tokenizer = MyTokenizer(tokenizer_name) ##Load HAN config with open(HAN_config_path, 'r') as f: HAN_config = json.load(f) HAN_config = Struct(**HAN_config) ##Load HAN model self.model = HierarchialAttentionNetwork( dictionary_size=self.word2vec_model.dict_size, embedding_size=word2vec_config.size, hidden_size=HAN_config.hidden_size, attention_size=HAN_config.atten_size, num_class=HAN_config.num_class, n_layers=HAN_config.n_layers, device=device) self.model.set_embedding(self.word2vec_model.embedding) check_point = torch.load(HAN_mdoel_path) self.model.load_state_dict(check_point["model"]) self.model.to(device) def analysis(self, doc): # |doc| = (doc) tokens = [[ word for word in self.tokenizer.tokenize(sentences, lemma=False) ] for sentences in sent_tokenize(doc)] temp_index = [[ self.word2vec_model.word2index.get(word) if self.word2vec_model.word2index.get(word) else 0 for word in self.tokenizer.tokenize(sentences) ] for sentences in sent_tokenize(doc)] for sentence in temp_index: ##Even though there is no word after preprocess procedure, must put something like "[UNK]" to run machine if len(sentence) == 0: sentence.extend([0]) temp_sent_len = len(temp_index) temp_word_len = [len(sent) for sent in temp_index] max_sent_len = temp_sent_len max_word_len = max(temp_word_len) for sent in temp_index: if len(sent) < max_word_len: extended_words = [0 for _ in range(max_word_len - len(sent))] sent.extend(extended_words) if len(temp_index) < max_sent_len: extended_sentences = [[0 for _ in range(max_word_len)] for _ in range(max_sent_len - len(temp_index))] temp_index.extend(extended_sentences) temp_index = [sentences[:max_word_len] for sentences in temp_index][:max_sent_len] if len(temp_word_len) < max_sent_len: extended_word_len = [ 0 for _ in range(max_sent_len - len(temp_word_len)) ] temp_word_len.extend(extended_word_len) temp_word_len = temp_word_len[:max_sent_len] temp_index = torch.tensor(temp_index) temp_sent_len = torch.tensor(temp_sent_len) temp_word_len = torch.tensor(temp_word_len) temp_index = temp_index.unsqueeze(0).to(self.device) temp_sent_len = temp_sent_len.unsqueeze(0).to(self.device) temp_word_len = temp_word_len.unsqueeze(0).to(self.device) y_hat, sent_weights, word_weights = self.model(temp_index, temp_sent_len, temp_word_len) ps = torch.exp(y_hat) top_p, top_class = ps.topk(1, dim=1) sent_weights = sent_weights.squeeze() word_weights = word_weights.squeeze() return top_class, tokens, sent_weights, word_weights def view(self, doc): top_class, tokens, sent_weights, word_weights = self.analysis(doc) sent_weights = sent_weights.tolist() word_weights = word_weights.tolist() total_len = len(sent_weights) for sent, word_weight, sent_weight in zip(tokens, word_weights, sent_weights): temp_str = self.mk_weight_string(sent, word_weight, sent_weight, total_len) self.printmd(temp_str) def mk_weight_string(self, str_list, w_list, s_weight, total_len): temp_str = [] for string, weight in zip(str_list, w_list): temp_str += [ '<span style="background-color:rgba(255,0,0,' + str(weight) + '); font-size: ' + str(int(total_len) * 10 * s_weight) + 'pt;">' + string + '</span>' ] return " ".join(temp_str) # Markdown Printer def printmd(self, string): display(Markdown(string))
def main(): """ Training and validation. """ global best_acc, epochs_since_improvement, checkpoint, start_epoch, word_map # Initialize model or load checkpoint if checkpoint is not None: checkpoint = torch.load(checkpoint) model = checkpoint['model'] optimizer = checkpoint['optimizer'] word_map = checkpoint['word_map'] start_epoch = checkpoint['epoch'] + 1 best_acc = checkpoint['best_acc'] epochs_since_improvement = checkpoint['epochs_since_improvement'] print( '\nLoaded checkpoint from epoch %d, with a previous best accuracy of %.3f.\n' % (start_epoch - 1, best_acc)) else: embeddings, emb_size = load_word2vec_embeddings( word2vec_file, word_map) # load pre-trained word2vec embeddings model = HierarchialAttentionNetwork( n_classes=n_classes, vocab_size=len(word_map), emb_size=emb_size, word_rnn_size=word_rnn_size, sentence_rnn_size=sentence_rnn_size, word_rnn_layers=word_rnn_layers, sentence_rnn_layers=sentence_rnn_layers, word_att_size=word_att_size, sentence_att_size=sentence_att_size, dropout=dropout) model.sentence_attention.word_attention.init_embeddings( embeddings ) # initialize embedding layer with pre-trained embeddings model.sentence_attention.word_attention.fine_tune_embeddings( fine_tune_word_embeddings) # fine-tune optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=lr) # Loss functions criterion = nn.CrossEntropyLoss() # Move to device model = model.to(device) criterion = criterion.to(device) # DataLoaders train_loader = torch.utils.data.DataLoader(HANDataset( data_folder, 'train'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(HANDataset(data_folder, 'test'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # One epoch's training train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) # One epoch's validation acc = validate(val_loader=val_loader, model=model, criterion=criterion) # Did validation accuracy improve? is_best = acc > best_acc best_acc = max(acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Decay learning rate every epoch # adjust_learning_rate(optimizer, 0.5) # Save checkpoint save_checkpoint(epoch, model, optimizer, best_acc, word_map, epochs_since_improvement, is_best)
def run(config): def _print_config(config): import pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(vars(config)) _print_config(config) if not logging.getLogger() == None: for handler in logging.getLogger().handlers[:]: # make a copy of the list logging.getLogger().removeHandler(handler) if not config.save_path and config.dict_path: all_subdir = [int(s) for s in os.listdir(config.dict_path) if os.path.isdir(os.path.join(config.dict_path, str(s)))] max_dir_num = 0 if all_subdir: max_dir_num = max(all_subdir) max_dir_num += 1 config.save_path = os.path.join(config.dict_path, str(max_dir_num)) os.mkdir(config.save_path) logging.basicConfig(filename=os.path.join(config.save_path, 'train_log'), level=tools.LOGFILE_LEVEL, filemode='w') console = logging.StreamHandler() console.setLevel(tools.CONSOLE_LEVEL) logging.getLogger().addHandler(console) logging.info("##################### Start Training") logging.debug(vars(config)) ##load data loader logging.info("##################### Load DataLoader") loader = MyDataLoader(train_path=config.train_path, valid_path=config.valid_path, dict_path=config.dict_path, batch_size=config.batch_size, tokenizer_name=config.tokenizer_name, max_sent_len=config.max_sent_len, max_word_len=config.max_word_len) train, valid, num_class = loader.get_train_valid() logging.info("##################### Train Dataset size : [" + str(len(train)) + "]") logging.info("##################### Valid Dataset size : [" + str(len(valid)) + "]") logging.info("##################### class size : [" + str(num_class) + "]") dict_size = loader.get_dict_size() word_vec_dim = loader.get_dict_vec_dim() embedding = loader.get_embedding() config.num_class = num_class logging.info("##################### Load 'HAN' Model") model = HierarchialAttentionNetwork(dictionary_size=dict_size, embedding_size=word_vec_dim, hidden_size=config.hidden_size, attention_size=config.atten_size, num_class=num_class, n_layers=config.n_layers, device=config.device ) model.set_embedding(embedding) model.to(config.device) crit = nn.NLLLoss() trainer = Trainer(model=model, crit=crit, config=config, device=config.device) history = trainer.train(train, valid) return history
def main(): """ Training and validation. """ global checkpoint, start_epoch, word_map iter = 5 res = {"best_eval_acc": [], "best_eval_f1": [], "best_eval_step": []} for i in range(1, 1 + iter): print("=" * 10 + "ROUND " + str(i) + "=" * 10) # DataLoaders train_loader = torch.utils.data.DataLoader(HANDataset( data_folder, 'train'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Load test data test_loader = torch.utils.data.DataLoader(HANDataset( data_folder, 'test'), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) # Initialize model or load checkpoint if checkpoint is not None: checkpoint = torch.load(checkpoint) model = checkpoint['model'] optimizer = checkpoint['optimizer'] word_map = checkpoint['word_map'] start_epoch = checkpoint['epoch'] + 1 print('\nLoaded checkpoint from epoch %d.\n' % (start_epoch - 1)) else: # embeddings, emb_size = load_word2vec_embeddings(word2vec_file, word_map) # load pre-trained word2vec embeddings # embeddings, emb_size = load_glove_w2v(word_map) # load pre-trained word2vec embeddings emb_size = 200 # embeddings = torch.FloatTensor(len(word_map), emb_size) # init_embedding(embeddings) model = HierarchialAttentionNetwork( n_classes=n_classes, vocab_size=len(word_map), emb_size=emb_size, word_rnn_size=word_rnn_size, sentence_rnn_size=sentence_rnn_size, word_rnn_layers=word_rnn_layers, sentence_rnn_layers=sentence_rnn_layers, word_att_size=word_att_size, sentence_att_size=sentence_att_size, dropout=dropout) # model.sentence_attention.word_attention.init_embeddings( # embeddings) # initialize embedding layer with pre-trained embeddings model.sentence_attention.word_attention.fine_tune_embeddings( fine_tune_word_embeddings) # fine-tune optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=lr) # Loss functions criterion = nn.CrossEntropyLoss() # Move to device model = model.to(device) criterion = criterion.to(device) best_acc = 0.0 best_f1 = 0.0 best_step = 0 # Epochs for epoch in range(start_epoch, epochs): # One epoch's training eval_acc, eval_f1, eval_step = train(train_loader=train_loader, test_loader=test_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) if eval_acc > best_acc: best_acc = eval_acc best_f1 = eval_f1 best_step = eval_step # Decay learning rate every epoch # adjust_learning_rate(optimizer, 0.1) # Save checkpoint # save_checkpoint(epoch, model, optimizer, word_map) res["best_eval_acc"].append(best_acc) res["best_eval_f1"].append(best_f1) res["best_eval_step"].append(best_step) print("=" * 20 + "TRAINING FINISHED" + "=" * 20) print("avg acc: %f" % (float(np.sum(res["best_eval_acc"])) / len(res["best_eval_acc"]))) print("avg f1: %f" % (float(np.sum(res["best_eval_f1"])) / len(res["best_eval_f1"]))) print(" ".join(["{}: {}".format(key, str(res[key])) for key in res])) writer.close()