def instructions(): print('\n' + 'AID2: Clover Edition Instructions: \n' + ' Enter actions starting with a verb ex. "go to the tavern" or "attack the orc."\n' + ' To speak enter say "(thing you want to say)" or just "(thing you want to say)"\n' + ' To insert your own text into the story, enter !(thing you want to insert)') print('The following commands can be entered for any action:') print(' "/revert" Reverts the last action allowing you to pick a different action.') print(' "/quit" Quits the game and saves') print(' "/menu" Starts a new game and saves your current one') print(' "/retry" Retries the last action') print(' "/restart" Restarts the current story') print(' "/print" Prints a transcript of your adventure (without extra newline formatting)') print(' "/alter" Edit the last prompt from the AI') print(' "/altergen" Edit the last result from the AI and have it generate the rest') print(' "/context" Edit the story\'s permanent context paragraph') print(' "/remember [SENTENCE]" Commits something permanently to the AI\'s memory') print(' "/forget" Opens a menu allowing you to remove permanent memories') print(' "/save" Saves your game to a file in the game\'s save directory') print(' "/load" Loads a game from a file in the game\'s save directory') print(' "/summarize" Create a new story using by summarizing your previous one') print(' "/help" Prints these instructions again') print(' "/set [SETTING] [VALUE]" Sets the specified setting to the specified value.:') for k, v in setting_info.items(): print(pad_text(' ' + k, 27) + v[0] + (" " if v[0] else "") + "Default: " + str(v[1]) + " | " "Current: " + settings.get(k))
def instructions(): print( '\n' + 'AID2: Инструкции \n' + ' Описывай действия с глагола т.е. "идешь в таверну", "взмахнул мечом""\n' + ' Для описания диалога используй конструкцию ">ты говоришь что-то" или "" для прямой речи"\n' + ' Чтобы вставить свою историю в текст введи !(сюжетный_текст)\n' + ' Если кажется, что модель не договорила и оборвалась на полуслове то отправь пустой текст (просто enter нажми) и модель допишет текст' ) print('The following commands can be entered for any action:') print( ' "/revert" Reverts the last action allowing you to pick a different action.' ) print(' "/quit" Quits the game and saves') print( ' "/menu" Starts a new game and saves your current one' ) print(' "/retry" Retries the last action') print(' "/restart" Restarts the current story') print( ' "/print" Prints a transcript of your adventure (without extra newline formatting)' ) print(' "/alter" Edit the last prompt from the AI') print( ' "/altergen" Edit the last result from the AI and have it generate the rest' ) print( ' "/context" Edit the story\'s permanent context paragraph' ) print( ' "/remember [SENTENCE]" Commits something permanently to the AI\'s memory' ) print( ' "/forget" Opens a menu allowing you to remove permanent memories' ) print( ' "/save" Saves your game to a file in the game\'s save directory' ) print( ' "/load" Loads a game from a file in the game\'s save directory' ) print( ' "/summarize" Create a new story using by summarizing your previous one' ) print(' "/help" Prints these instructions again') print( ' "/set [SETTING] [VALUE]" Sets the specified setting to the specified value.:' ) for k, v in setting_info.items(): print( pad_text(' ' + k, 27) + v[0] + (" " if v[0] else "") + "Default: " + str(v[1]) + " | " "Current: " + settings.get(k))
def beam_search(self, tokens, token_ids, token_mask, aspect, sentiment, start_idx=101, end_idx=102, beam_size=1, max_len=200, dev=False): batch_size, token_len, hidden_dim = tokens.size() xt = self.iso_transform(self.embedding(token_ids)) xt = xt + tokens tokens = self.iso_mlp(xt) zt = torch.cat([aspect, sentiment], dim=-1) input_ = (tokens*token_mask.unsqueeze(-1)).sum(dim=1) input_ = input_ / token_mask.mean(dim=1).unsqueeze(-1) s0, c0 = self.ht_transform(input_).chunk(2, dim=-1) zt = zt.unsqueeze(1) s0 = s0.view(1, 1, self.hidden_dim) c0 = c0.view(1, 1, self.hidden_dim) beam = [{ 'input': [start_idx], #torch.tensor([start_idx]).cuda(), 'prob': 0, 'prob_norm': 0, 'trigrams': [] }] finished = [] while len(beam) != 0: new_beam = [] inp_batch = [instance['input'] for instance in beam] inp_batch, _ = utils.pad_text(inp_batch) yt = self.embedding(inp_batch) batch_size, output_len, _ = yt.size() yzt = self.yt_transform(torch.cat([yt, zt.expand(batch_size, output_len, -1)], dim=-1)) s0_ = s0.expand(-1, batch_size, -1).contiguous() c0_ = c0.expand(-1, batch_size, -1).contiguous() st, _ = self.decoder(yzt, (s0_, c0_)) kt = self.attend_key(tokens).unsqueeze(1) qt = self.attend_query(st).unsqueeze(2) # batch size, output len, 1, hidden dim at = self.attend_weight((kt+qt).tanh()).softmax(dim=2) # batch size, output len, token len, 1 at = at * token_mask.unsqueeze(1).unsqueeze(-1) at = at / at.sum(dim=2, keepdim=True) vt = (tokens.unsqueeze(1)*at).sum(dim=2) # batch size, output len, hidden dim at = at.squeeze(-1) gt = self.pointer(torch.cat([yzt,st,vt], dim=-1)).sigmoid() p_copy = torch.zeros(batch_size, output_len, self.vocab_size).cuda() bindex = torch.arange(0, batch_size).unsqueeze(-1).expand(-1, token_len*output_len).contiguous().view(-1) oindex = torch.arange(0, output_len).unsqueeze(0).unsqueeze(-1).expand(batch_size, -1, token_len).contiguous().view(-1) tindex = torch.arange(0, token_len).unsqueeze(0).unsqueeze(0).expand(batch_size, output_len, -1).contiguous().view(-1) vindex = token_ids.unsqueeze(1).expand(batch_size, output_len, -1).contiguous().view(-1) p_copy[bindex,oindex,vindex] += at[bindex,oindex,tindex] p_generate = (self.dec_classifier(st) + self.att_classifier(vt)).softmax(dim=-1) pt_batch = gt * p_generate + (1-gt) * p_copy for pt, instance in zip(pt_batch, beam): inp = instance['input'] prob = instance['prob'] trigrams = instance['trigrams'] if len(inp) == max_len: finished.append(instance) continue if inp[-1] == end_idx: finished.append(instance) continue pt = pt[len(inp)-1] pk, yk = torch.topk(pt, k=20, dim=-1) count = 0 nuclear = 0 for pt, yt in zip(pk, yk): if count == beam_size: break if not dev: if yt == end_idx and len(inp) < 10: continue if len(inp) >= 1: if inp[-1] == yt: continue if len(inp) >= 1: if tuple(inp[-1:] + [yt.item()]) in trigrams: continue if len(inp) >= 3: if inp[-3:-1] == inp[-1:] + [yt.item()]: continue count += 1 new_instance = { 'input': inp + [yt.item()], #torch.cat([inp, yt.unsqueeze(0)], dim=-1), 'prob': prob + torch.log(pt), 'prob_norm': (prob + torch.log(pt)) / (len(inp) + 1), 'prob_ln': (prob + torch.log(pt)) / ((5 + len(inp)) ** 0.6 / 6 ** 0.6), 'trigrams': trigrams + [tuple(inp[-2:])] } new_beam.append(new_instance) beam = sorted(new_beam, key=lambda a: -a['prob_norm'])[:beam_size] finished = sorted(finished, key=lambda a: -a['prob_norm'])[0] return torch.Tensor(finished['input']).cuda()
def create_synthetic_data(args): print(args) file_name = 'data/%s/train.plan.json' % args.data_type alpha_a = args.alpha alpha_s = args.alpha condense_file = 'model/%s/condense.model' % args.data_type tokenizer = BertTokenizer.from_pretrained(args.bert_config) tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']}) vocab_size = len(tokenizer) print('Loading corpus...') x_train, _ = utils.abstract_data(args.train_file, tokenizer) print('Loading models...') assert os.path.exists(condense_file) con_encoder = nn.Embedding(vocab_size, args.input_dim) con_encoder.requires_grad_(False) con_encoder.cuda() con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim, args.hidden_dim, vocab_size) con_model.requires_grad_(False) con_model.cuda() best_point = torch.load(condense_file) con_encoder.load_state_dict(best_point['encoder']) con_model.load_state_dict(best_point['model']) data = [] vectors = [] print('Creating synthetic dataset...') for i in tqdm(range(len(x_train))): x_batches = x_train[i] for x_idx in range(0, len(x_batches), 500): x_batch = x_batches[x_idx:x_idx + 500] x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch] if len(x_batch) < 100: continue token_ids, mask = utils.pad_text(x_batch) tokens = con_encoder(token_ids) _, doc, prob_a, prob_s = con_model.condense(tokens, mask) doc = doc.cpu().detach().numpy() prob_a = prob_a.cpu().detach().numpy() # b, a prob_s = prob_s.cpu().detach().numpy() # b, s for idx, (d, a, s) in enumerate(zip(doc, prob_a, prob_s)): if not utils.check_summary_worthy( x_batch[idx], tokenizer, args.min_length, args.max_length, args.max_symbols, args.max_tridots): continue N = -1 while N < args.min_reviews or N > min(len(x_batch), args.max_reviews): N = np.random.normal(args.mean_reviews, args.std_reviews) N = int(N) a_ = np.random.dirichlet(alpha_a * a + 1e-9, N)[:, np.newaxis] # N, a s_ = np.random.dirichlet(alpha_s * s + 1e-9, N)[:, np.newaxis] # N, s dist_a = np.sqrt( ((np.sqrt(prob_a[np.newaxis]) - np.sqrt(a_))**2).sum(-1)) dist_s = np.sqrt( ((np.sqrt(prob_s[np.newaxis]) - np.sqrt(s_))**2).sum(-1)) dist = dist_a + dist_s dist[:, idx] = 1e9 idx_set = [] for d in dist: d = np.argsort(d) for d_ in d: if d_ not in idx_set: idx_set.append(d_) break inst = {} inst['summary'] = ' '.join( tokenizer.decode(x_batch[idx]).split()[1:-1]) inst['reviews'] = [ ' '.join(tokenizer.decode(x_batch[i]).split()[1:-1]) for i in idx_set if idx != i ] data.append(inst) f = open(file_name, 'w') json.dump(data, f, indent=2) f.close() print('Dataset saved.')
def train(args): print(args) os.makedirs('model/%s/' % args.data_type, exist_ok=True) model_file = 'model/%s/condense.model' % args.data_type tokenizer = BertTokenizer.from_pretrained(args.bert_config) tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']}) vocab_size = len(tokenizer) print('Loading datasets...') x_train, y_train = utils.condense_data(args.train_file, args.adjust_sentiment) if args.data_type == 'rotten': x_dev, y_dev = utils.condense_data(args.dev_file, args.adjust_sentiment) else: shuffle_indices = np.random.permutation(np.arange(len(x_train))) x_dev = x_train[:2000] y_dev = y_train[:2000] x_train = x_train[2000:] y_train = y_train[2000:] print('Initializing models...') encoder = nn.Embedding(vocab_size, args.input_dim) encoder.cuda() model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim, args.hidden_dim, vocab_size) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.998), eps=1e-9) scheduler = get_constant_schedule_with_warmup(optimizer, args.warmup) best_loss = 10000 if os.path.exists(model_file): print('Loading model checkpoint...') best_point = torch.load(model_file) encoder.load_state_dict(best_point['encoder']) model.load_state_dict(best_point['model']) optimizer.load_state_dict(best_point['optimizer']) best_loss = best_point['dev_loss'] eval_at = args.evaluate_every stop_at = args.training_stopper step = 0 print('Start training...') for epoch in range(args.num_epoch): if stop_at <= 0: break shuffle_indices = np.random.permutation(np.arange(len(x_train))) asp_losses = [] asp_norm_losses = [] sen_losses = [] sen_norm_losses = [] adv_losses = [] train_iterator = tqdm(range(0, len(shuffle_indices), args.batch_size)) for i in train_iterator: if stop_at <= 0: train_iterator.close() break if i + args.batch_size >= len(shuffle_indices): continue encoder.train() model.train() indices = shuffle_indices[i:i + args.batch_size] x_batch = [x_train[idx] for idx in indices] y_batch = [y_train[idx] for idx in indices] x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch] x_batch, mask = utils.pad_text(x_batch) tokens = encoder(x_batch) before, after, sent_pred, adv_pred = model(tokens, mask, x_batch) sent_gold = torch.Tensor(y_batch).long().cuda() losses = model.calculate_loss(before, after, sent_pred, adv_pred, sent_gold) asp_losses.append(losses[0].item()) asp_norm_losses.append(losses[1].item()) sen_losses.append(losses[2].item()) sen_norm_losses.append(losses[3].item()) adv_losses.append(losses[4].item()) batch_loss = torch.sum(torch.stack(losses)) batch_loss.backward() nn.utils.clip_grad_norm_(encoder.parameters(), 2) nn.utils.clip_grad_norm_(model.parameters(), 2) nan_check = False for param in model.parameters(): if param.grad is not None: if torch.isnan(param.grad.sum()): nan_check = True break if not nan_check: optimizer.step() scheduler.step() optimizer.zero_grad() eval_at -= len(x_batch) if eval_at <= 0: shuffle_indices = np.random.permutation(np.arange(len(x_dev))) x_dev = np.array(x_dev)[shuffle_indices] y_dev = np.array(y_dev)[shuffle_indices] train_asp_loss = np.mean(asp_losses) train_asp_norm_loss = np.mean(asp_norm_losses) train_sen_loss = np.mean(sen_losses) train_sen_norm_loss = np.mean(sen_norm_losses) train_adv_loss = np.mean(adv_losses) dev_asp_loss = [] dev_asp_norm_loss = [] dev_sen_loss = [] dev_sen_norm_loss = [] dev_adv_loss = [] for j in tqdm(range(0, len(x_dev), args.batch_size)): encoder.eval() model.eval() x_batch = x_dev[j:j + args.batch_size] x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch] x_batch, mask = utils.pad_text(x_batch) tokens = encoder(x_batch) before, after, sent_pred, adv_pred = model( tokens, mask, x_batch) sent_gold = torch.Tensor( y_dev[j:j + args.batch_size]).long().cuda() losses = model.calculate_loss(before, after, sent_pred, adv_pred, sent_gold) dev_asp_loss.append(losses[0].item()) dev_asp_norm_loss.append(losses[1].item()) dev_sen_loss.append(losses[2].item()) dev_sen_norm_loss.append(losses[3].item()) dev_adv_loss.append(losses[4].item()) dev_asp_loss = np.mean(dev_asp_loss) dev_asp_norm_loss = np.mean(dev_asp_norm_loss) dev_sen_loss = np.mean(dev_sen_loss) dev_sen_norm_loss = np.mean(dev_sen_norm_loss) dev_adv_loss = np.mean(dev_adv_loss) dev_loss = dev_asp_loss + dev_asp_norm_loss + dev_sen_loss + dev_sen_norm_loss + dev_adv_loss tqdm.write("----------------------------------------------") tqdm.write("Epoch: %d, Batch: %d" % (epoch, i)) tqdm.write( "Train Losses: %.4f %.4f %.4f %.4f %.4f" % (train_asp_loss, train_asp_norm_loss, train_sen_loss, train_sen_norm_loss, train_adv_loss)) tqdm.write("Dev Losses: %.4f %.4f %.4f %.4f %.4f" % (dev_asp_loss, dev_asp_norm_loss, dev_sen_loss, dev_sen_norm_loss, dev_adv_loss)) if best_loss >= dev_loss: tqdm.write("UPDATING MODEL FILE...") best_loss = dev_loss stop_at = args.training_stopper torch.save( { 'encoder': encoder.state_dict(), 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'dev_loss': dev_loss }, model_file) else: stop_at -= 1 tqdm.write("STOPPING AT: %d" % stop_at) tqdm.write("----------------------------------------------") asp_losses = [] asp_norm_losses = [] sen_losses = [] sen_norm_losses = [] adv_losses = [] eval_at = args.evaluate_every
def get_items_from_dialogs(self): # just load the item pkl if it exists item_pkl = getattr(DatasetOption, '{}_{}_item_pkl'.format(self.task, self.mode)) if isfile(item_pkl): print('reading item pkl {}'.format(item_pkl)) self.items = pkl.load(open(item_pkl, 'rb')) print('item pkl %s read complete' % item_pkl) return for item_idx, dialog in enumerate(self.dialogs): print('get items from dialogs {}/{}'.format( item_idx + 1, len(self.dialogs))) # standardize utterance # user, system, user, system... std_dialog = [] for utter in dialog: if not std_dialog: if utter.speaker != 'user': std_dialog.append(Utterance('user', '', [], [])) else: std_dialog.append(utter) else: if utter.speaker != std_dialog[-1].speaker: std_dialog.append(utter) else: std_dialog[-1].text += ' ' + utter.text std_dialog[-1].images += utter.images std_dialog[-1].false_images += utter.false_images item = [self.empty_utterance] * DatasetOption.context_size for idx, utter in enumerate(std_dialog): text, text_length = pad_text(self.vocab, DatasetOption.context_text_length, utter.text) true_images, true_prods = self.get_imgs_prods( utter.images, DatasetOption.num_pos_images) false_images, false_prods = self.get_imgs_prods( utter.false_images, DatasetOption.num_neg_images) item.append((text, text_length, true_images, true_prods, false_images, false_prods)) if utter.speaker == 'system': item = item[-(DatasetOption.context_size + 1):] texts, text_lengths, true_images, true_prods, false_images, false_prods = map( list, zip(*item)) if self.task == 'image': if self.has_no_image( true_images[-1]) or self.has_no_image( false_images[-1]): continue self.items.append( Item(texts, text_lengths, true_images, true_prods, false_images, false_prods)) # save items to pkl file print('save item pkl to {}...'.format(item_pkl)) with open(item_pkl, 'wb') as f: pkl.dump(self.items, f) print('saved')
def evaluate(args): print(args) condense_file = 'model/%s/condense.model' % args.data_type abstract_file = 'model/%s/abstract.model' % args.data_type os.makedirs('output/%s/' % args.data_type, exist_ok=True) solution_file = 'output/%s/predictions.txt' % args.data_type tokenizer = BertTokenizer.from_pretrained(args.bert_config) tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']}) vocab_size = len(tokenizer) print('Loading datasets...') x_test, y_test = utils.abstract_data(args.test_file, multi_ref=args.multi_ref) if args.data_type == 'rotten': m_test = utils.get_movies_from_file(args.test_file) print('Initializing models...') con_encoder = nn.Embedding(vocab_size, args.input_dim) con_encoder.requires_grad_(False) con_encoder.cuda() con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim, args.hidden_dim, vocab_size) con_model.requires_grad_(False) con_model.cuda() model = Abstract(vocab_size, args.hidden_dim, args.hidden_dim) model.requires_grad_(False) model.cuda() print('Loading models...') assert os.path.exists(condense_file) best_point = torch.load(condense_file) con_encoder.load_state_dict(best_point['encoder']) con_model.load_state_dict(best_point['model']) assert os.path.exists(abstract_file) best_point = torch.load(abstract_file) model.load_state_dict(best_point['model']) eval_at = args.evaluate_every stop_at = args.training_stopper f_sol = open(solution_file, 'w', encoding='utf-8', errors='ignore') printing = 5 pred_sums = [] print('Generating summaries...') for j in tqdm(range(0, len(x_test), 1)): model.eval() x_batch = x_test[j:j + 1] y_batch = y_test[j:j + 1] if args.data_type == 'rotten': m_batch = m_test[j:j + 1] x_batch = [[tokenizer.encode(x_rev) for x_rev in x_inst] for x_inst in x_batch] y_batch = [tokenizer.encode(y_inst) for y_inst in y_batch] tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense( x_batch, tokenizer, con_encoder, con_model) tokens_batch = utils.pad_vector(tokens_batch, args.hidden_dim)[0] token_ids_batch, token_mask_batch = utils.pad_text(token_ids_batch) aspect_batch = torch.Tensor( aspect_batch).float().cuda() # batch size, hidden dim sentiment_batch = torch.Tensor( sentiment_batch).float().cuda() # batch size, hidden dim y_batch = [tokenizer.encode(y) for y in y_batch] output_batch, output_mask_batch = utils.pad_text(y_batch) pred_batch = model.beam_search(tokens_batch, token_ids_batch, token_mask_batch, aspect_batch, sentiment_batch, beam_size=args.beam_size, max_len=args.max_len) output = output_batch[0].cpu().detach().numpy() pred = pred_batch.cpu().detach().numpy() output = list([int(y) for y in output if int(y) != 101]) pred = list([int(p) for p in pred if int(p) != 101]) output = output[:output.index(102)] try: pred = pred[:pred.index(102)] except: pass output = tokenizer.decode(output) pred = tokenizer.decode(pred) if args.data_type == 'rotten': output = output.replace('<movie>', m_batch[0]) pred = pred.replace('<movie>', m_batch[0]) f_sol.write(pred + '\n') if printing: printing -= 1 tqdm.write('gold: %s' % output) tqdm.write('pred: %s' % pred) tqdm.write("----------------------------------------------") f_sol.close() print('Summaries saved.')
def train(args): print(args) condense_file = 'model/%s/condense.model' % args.data_type abstract_file = 'model/%s/abstract.model' % args.data_type tokenizer = BertTokenizer.from_pretrained(args.bert_config) tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']}) vocab_size = len(tokenizer) print('Loading datasets...') x_train, y_train = utils.abstract_data(args.train_file) x_dev, y_dev = utils.abstract_data(args.test_file, multi_ref=args.multi_ref) print('Initializing models...') language_model = BertForMaskedLM.from_pretrained(args.bert_config) language_model.requires_grad_(False) language_model.cuda() assert os.path.exists(condense_file) con_encoder = nn.Embedding(vocab_size, args.input_dim) con_encoder.requires_grad_(False) con_encoder.cuda() con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim, args.hidden_dim, vocab_size) con_model.requires_grad_(False) con_model.cuda() best_point = torch.load(condense_file) con_encoder.load_state_dict(best_point['encoder']) con_model.load_state_dict(best_point['model']) model = Abstract(vocab_size, args.hidden_dim, args.hidden_dim) model.cuda() optimizer = torch.optim.Adam(model.parameters()) best_acc = 0 saved_models = [] if os.path.exists(abstract_file): print('Loading model checkpoint...') best_point = torch.load(abstract_file) model.load_state_dict(best_point['model']) optimizer.load_state_dict(best_point['optimizer']) best_acc = best_point['dev_acc'] eval_at = args.evaluate_every stop_at = args.training_stopper losses = [] gate = [] print('Start training...') for epoch in range(args.num_epoch): if stop_at <= 0: break shuffle_indices = np.random.permutation(len(x_train)) for step in tqdm(range(0, len(x_train), args.batch_size)): if stop_at <= 0: break indices = shuffle_indices[step:step + args.batch_size] x_batch = [x_train[idx] for idx in indices] y_batch = [y_train[idx] for idx in indices] x_batch = [[tokenizer.encode(x_rev) for x_rev in x_inst] for x_inst in x_batch] y_batch = [tokenizer.encode(y_inst) for y_inst in y_batch] model.train() tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense( x_batch, tokenizer, con_encoder, con_model) output_smooth_batch, output_mask_batch = utils.bert_label_smoothing( y_batch, tokenizer, language_model) tokens_batch = utils.pad_vector(tokens_batch, args.hidden_dim)[0] token_ids_batch, token_mask_batch = utils.pad_text(token_ids_batch) aspect_batch = torch.Tensor( aspect_batch).float().cuda() # batch size, hidden dim sentiment_batch = torch.Tensor( sentiment_batch).float().cuda() # batch size, hidden dim output_batch, _ = utils.pad_text(y_batch) _, gt, loss = model(tokens_batch, token_ids_batch, token_mask_batch, aspect_batch, sentiment_batch, output_batch, output_smooth_batch, output_mask_batch) losses.append(loss.item()) gate.append(gt.mean().item()) try: loss.backward() except: continue nn.utils.clip_grad_norm_(model.parameters(), 3) nan_check = False for param in model.parameters(): if param.grad is not None: if torch.isnan(param.grad.sum()): nan_check = True break if not nan_check: optimizer.step() optimizer.zero_grad() eval_at -= 1 if eval_at <= 0: with torch.no_grad(): train_loss = np.mean(losses) train_gate = np.mean(gate) eval_at = args.evaluate_every losses = [] gate = [] tqdm.write( "----------------------------------------------") tqdm.write("Epoch: %d" % (epoch)) tqdm.write("Step: %d" % (step)) tqdm.write('Train gate: %.4f' % train_gate) tqdm.write('Train loss: %.4f' % train_loss) if train_loss > 4: continue dev_acc = [] dev_loss = [] pred_sums = [] gold_sums = [] printing = 5 for j in tqdm(range(0, len(x_dev), 1)): model.eval() x_batch = x_dev[j:j + 1] y_batch = y_dev[j:j + 1] x_batch = [[ tokenizer.encode(x_rev) for x_rev in x_inst ] for x_inst in x_batch] y_batch = [ tokenizer.encode(y_inst) for y_inst in y_batch ] tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense( x_batch, tokenizer, con_encoder, con_model) output_smooth_batch, output_mask_batch = utils.bert_label_smoothing( y_batch, tokenizer, language_model) tokens_batch = utils.pad_vector( tokens_batch, args.hidden_dim)[0] token_ids_batch, token_mask_batch = utils.pad_text( token_ids_batch) aspect_batch = torch.Tensor(aspect_batch).float().cuda( ) # batch size, hidden dim sentiment_batch = torch.Tensor(sentiment_batch).float( ).cuda() # batch size, hidden dim output_batch, _ = utils.pad_text(y_batch) pred_batch, _, loss = model(tokens_batch, token_ids_batch, token_mask_batch, aspect_batch, sentiment_batch, output_batch, output_smooth_batch, output_mask_batch, dev=True) dev_acc.append(loss[1].item()) output = output_batch[0].cpu().detach().numpy() pred = pred_batch[0].argmax(-1).cpu().detach().numpy() output = list(output) pred = list(pred) output = output[1:output.index(102)] try: pred = pred[:pred.index(102)] except: pass output = tokenizer.decode(output) pred = tokenizer.decode(pred) gold_sums.append(output) pred_sums.append(pred) if printing: printing -= 1 tqdm.write('gold: %s' % output) tqdm.write('pred: %s' % pred) tqdm.write( "----------------------------------------------" ) dev_acc = np.mean(dev_acc) tqdm.write('Dev ACC: %.4f' % dev_acc) if dev_acc >= best_acc: tqdm.write('UPDATING MODEL FILE...') best_acc = dev_acc stop_at = args.training_stopper torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'dev_acc': dev_acc, }, abstract_file) else: stop_at -= 1 tqdm.write("STOPPING AT: %d" % stop_at) tqdm.write( "----------------------------------------------")
def to_tensors(self, vocab): # convert a product into a tensor text, length = pad_text(vocab, DatasetOption.product_text_length, self.prod_str) attributes = torch.tensor(self.attributes, dtype=torch.long) return text, length, self.taxonomy, attributes