Example #1
0
def instructions():
    print('\n' +
          'AID2: Clover Edition Instructions: \n' +
          '  Enter actions starting with a verb ex. "go to the tavern" or "attack the orc."\n' +
          '  To speak enter say "(thing you want to say)" or just "(thing you want to say)"\n' +
		  '  To insert your own text into the story, enter !(thing you want to insert)')
    print('The following commands can be entered for any action:')
    print('  "/revert"                Reverts the last action allowing you to pick a different action.')
    print('  "/quit"                  Quits the game and saves')
    print('  "/menu"                  Starts a new game and saves your current one')
    print('  "/retry"                 Retries the last action')
    print('  "/restart"               Restarts the current story')
    print('  "/print"                 Prints a transcript of your adventure (without extra newline formatting)')
    print('  "/alter"                 Edit the last prompt from the AI')
    print('  "/altergen"              Edit the last result from the AI and have it generate the rest')
    print('  "/context"               Edit the story\'s permanent context paragraph')
    print('  "/remember [SENTENCE]"   Commits something permanently to the AI\'s memory')
    print('  "/forget"                Opens a menu allowing you to remove permanent memories')
    print('  "/save"                  Saves your game to a file in the game\'s save directory')
    print('  "/load"                  Loads a game from a file in the game\'s save directory')
    print('  "/summarize"             Create a new story using by summarizing your previous one')
    print('  "/help"                  Prints these instructions again')
    print('  "/set [SETTING] [VALUE]" Sets the specified setting to the specified value.:')
    for k, v in setting_info.items():
        print(pad_text('        ' + k, 27) + v[0] + (" " if v[0] else "") +
              "Default: " + str(v[1]) + " | "
              "Current: " + settings.get(k))
Example #2
0
def instructions():
    print(
        '\n' + 'AID2: Инструкции \n' +
        '  Описывай действия с глагола т.е. "идешь в таверну", "взмахнул мечом""\n'
        +
        '  Для описания диалога используй конструкцию ">ты говоришь что-то" или "" для прямой речи"\n'
        + '  Чтобы вставить свою историю в текст введи !(сюжетный_текст)\n' +
        '  Если кажется, что модель не договорила и оборвалась на полуслове то отправь пустой текст (просто enter нажми) и модель допишет текст'
    )
    print('The following commands can be entered for any action:')
    print(
        '  "/revert"                Reverts the last action allowing you to pick a different action.'
    )
    print('  "/quit"                  Quits the game and saves')
    print(
        '  "/menu"                  Starts a new game and saves your current one'
    )
    print('  "/retry"                 Retries the last action')
    print('  "/restart"               Restarts the current story')
    print(
        '  "/print"                 Prints a transcript of your adventure (without extra newline formatting)'
    )
    print('  "/alter"                 Edit the last prompt from the AI')
    print(
        '  "/altergen"              Edit the last result from the AI and have it generate the rest'
    )
    print(
        '  "/context"               Edit the story\'s permanent context paragraph'
    )
    print(
        '  "/remember [SENTENCE]"   Commits something permanently to the AI\'s memory'
    )
    print(
        '  "/forget"                Opens a menu allowing you to remove permanent memories'
    )
    print(
        '  "/save"                  Saves your game to a file in the game\'s save directory'
    )
    print(
        '  "/load"                  Loads a game from a file in the game\'s save directory'
    )
    print(
        '  "/summarize"             Create a new story using by summarizing your previous one'
    )
    print('  "/help"                  Prints these instructions again')
    print(
        '  "/set [SETTING] [VALUE]" Sets the specified setting to the specified value.:'
    )
    for k, v in setting_info.items():
        print(
            pad_text('        ' + k, 27) + v[0] + (" " if v[0] else "") +
            "Default: " + str(v[1]) + " | "
            "Current: " + settings.get(k))
Example #3
0
  def beam_search(self, tokens, token_ids, token_mask,
                  aspect, sentiment, start_idx=101, end_idx=102, 
                  beam_size=1, max_len=200, dev=False):
    batch_size, token_len, hidden_dim = tokens.size()

    xt = self.iso_transform(self.embedding(token_ids))
    xt = xt + tokens
    tokens = self.iso_mlp(xt)

    zt = torch.cat([aspect, sentiment], dim=-1)

    input_ = (tokens*token_mask.unsqueeze(-1)).sum(dim=1)
    input_ = input_ / token_mask.mean(dim=1).unsqueeze(-1)
    s0, c0 = self.ht_transform(input_).chunk(2, dim=-1)
    zt = zt.unsqueeze(1)

    s0 = s0.view(1, 1, self.hidden_dim)
    c0 = c0.view(1, 1, self.hidden_dim)

    beam = [{
      'input': [start_idx], #torch.tensor([start_idx]).cuda(),
      'prob': 0,
      'prob_norm': 0,
      'trigrams': []
    }]
    finished = []

    while len(beam) != 0:
      new_beam = []

      inp_batch = [instance['input'] for instance in beam]
      inp_batch, _ = utils.pad_text(inp_batch)

      yt = self.embedding(inp_batch)

      batch_size, output_len, _ = yt.size()
      yzt = self.yt_transform(torch.cat([yt, zt.expand(batch_size, output_len, -1)], dim=-1))

      s0_ = s0.expand(-1, batch_size, -1).contiguous()
      c0_ = c0.expand(-1, batch_size, -1).contiguous()

      st, _ = self.decoder(yzt, (s0_, c0_))

      kt = self.attend_key(tokens).unsqueeze(1)
      qt = self.attend_query(st).unsqueeze(2) # batch size, output len, 1, hidden dim
      at = self.attend_weight((kt+qt).tanh()).softmax(dim=2) # batch size, output len, token len, 1
      at = at * token_mask.unsqueeze(1).unsqueeze(-1)
      at = at / at.sum(dim=2, keepdim=True)
      vt = (tokens.unsqueeze(1)*at).sum(dim=2) # batch size, output len, hidden dim
      at = at.squeeze(-1)

      gt = self.pointer(torch.cat([yzt,st,vt], dim=-1)).sigmoid()
      p_copy = torch.zeros(batch_size, output_len, self.vocab_size).cuda()
      bindex = torch.arange(0, batch_size).unsqueeze(-1).expand(-1, token_len*output_len).contiguous().view(-1)
      oindex = torch.arange(0, output_len).unsqueeze(0).unsqueeze(-1).expand(batch_size, -1, token_len).contiguous().view(-1)
      tindex = torch.arange(0, token_len).unsqueeze(0).unsqueeze(0).expand(batch_size, output_len, -1).contiguous().view(-1)
      vindex = token_ids.unsqueeze(1).expand(batch_size, output_len, -1).contiguous().view(-1)
      p_copy[bindex,oindex,vindex] += at[bindex,oindex,tindex]

      p_generate = (self.dec_classifier(st) + self.att_classifier(vt)).softmax(dim=-1)
      pt_batch = gt * p_generate + (1-gt) * p_copy

      for pt, instance in zip(pt_batch, beam):
        inp = instance['input']
        prob = instance['prob']
        trigrams = instance['trigrams']

        if len(inp) == max_len:
          finished.append(instance)
          continue
        if inp[-1] == end_idx:
          finished.append(instance)
          continue

        pt = pt[len(inp)-1]

        pk, yk = torch.topk(pt, k=20, dim=-1)
        count = 0
        nuclear = 0
        for pt, yt in zip(pk, yk):
          if count == beam_size:
            break

          if not dev:
            if yt == end_idx and len(inp) < 10:
              continue
            if len(inp) >= 1:
              if inp[-1] == yt:
                continue
            if len(inp) >= 1:
              if tuple(inp[-1:] + [yt.item()]) in trigrams:
                continue
            if len(inp) >= 3:
              if inp[-3:-1] == inp[-1:] + [yt.item()]:
                continue

          count += 1
          new_instance = {
            'input': inp + [yt.item()], #torch.cat([inp, yt.unsqueeze(0)], dim=-1),
            'prob': prob + torch.log(pt),
            'prob_norm': (prob + torch.log(pt)) / (len(inp) + 1),
            'prob_ln': (prob + torch.log(pt)) / ((5 + len(inp)) ** 0.6 / 6 ** 0.6),
            'trigrams': trigrams + [tuple(inp[-2:])]
          }
          new_beam.append(new_instance)

      beam = sorted(new_beam, key=lambda a: -a['prob_norm'])[:beam_size]

    finished = sorted(finished, key=lambda a: -a['prob_norm'])[0]
    return torch.Tensor(finished['input']).cuda()
Example #4
0
def create_synthetic_data(args):
    print(args)

    file_name = 'data/%s/train.plan.json' % args.data_type

    alpha_a = args.alpha
    alpha_s = args.alpha

    condense_file = 'model/%s/condense.model' % args.data_type

    tokenizer = BertTokenizer.from_pretrained(args.bert_config)
    tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']})
    vocab_size = len(tokenizer)

    print('Loading corpus...')
    x_train, _ = utils.abstract_data(args.train_file, tokenizer)

    print('Loading models...')
    assert os.path.exists(condense_file)
    con_encoder = nn.Embedding(vocab_size, args.input_dim)
    con_encoder.requires_grad_(False)
    con_encoder.cuda()

    con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim,
                         args.hidden_dim, vocab_size)
    con_model.requires_grad_(False)
    con_model.cuda()

    best_point = torch.load(condense_file)
    con_encoder.load_state_dict(best_point['encoder'])
    con_model.load_state_dict(best_point['model'])

    data = []
    vectors = []

    print('Creating synthetic dataset...')
    for i in tqdm(range(len(x_train))):
        x_batches = x_train[i]

        for x_idx in range(0, len(x_batches), 500):
            x_batch = x_batches[x_idx:x_idx + 500]
            x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch]
            if len(x_batch) < 100:
                continue

            token_ids, mask = utils.pad_text(x_batch)
            tokens = con_encoder(token_ids)
            _, doc, prob_a, prob_s = con_model.condense(tokens, mask)

            doc = doc.cpu().detach().numpy()
            prob_a = prob_a.cpu().detach().numpy()  # b, a
            prob_s = prob_s.cpu().detach().numpy()  # b, s

            for idx, (d, a, s) in enumerate(zip(doc, prob_a, prob_s)):
                if not utils.check_summary_worthy(
                        x_batch[idx], tokenizer, args.min_length,
                        args.max_length, args.max_symbols, args.max_tridots):
                    continue

                N = -1
                while N < args.min_reviews or N > min(len(x_batch),
                                                      args.max_reviews):
                    N = np.random.normal(args.mean_reviews, args.std_reviews)
                N = int(N)

                a_ = np.random.dirichlet(alpha_a * a + 1e-9,
                                         N)[:, np.newaxis]  # N, a
                s_ = np.random.dirichlet(alpha_s * s + 1e-9,
                                         N)[:, np.newaxis]  # N, s

                dist_a = np.sqrt(
                    ((np.sqrt(prob_a[np.newaxis]) - np.sqrt(a_))**2).sum(-1))
                dist_s = np.sqrt(
                    ((np.sqrt(prob_s[np.newaxis]) - np.sqrt(s_))**2).sum(-1))

                dist = dist_a + dist_s
                dist[:, idx] = 1e9

                idx_set = []
                for d in dist:
                    d = np.argsort(d)
                    for d_ in d:
                        if d_ not in idx_set:
                            idx_set.append(d_)
                            break

                inst = {}
                inst['summary'] = ' '.join(
                    tokenizer.decode(x_batch[idx]).split()[1:-1])
                inst['reviews'] = [
                    ' '.join(tokenizer.decode(x_batch[i]).split()[1:-1])
                    for i in idx_set if idx != i
                ]
                data.append(inst)

    f = open(file_name, 'w')
    json.dump(data, f, indent=2)
    f.close()
    print('Dataset saved.')
Example #5
0
def train(args):
    print(args)

    os.makedirs('model/%s/' % args.data_type, exist_ok=True)
    model_file = 'model/%s/condense.model' % args.data_type

    tokenizer = BertTokenizer.from_pretrained(args.bert_config)
    tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']})
    vocab_size = len(tokenizer)

    print('Loading datasets...')
    x_train, y_train = utils.condense_data(args.train_file,
                                           args.adjust_sentiment)
    if args.data_type == 'rotten':
        x_dev, y_dev = utils.condense_data(args.dev_file,
                                           args.adjust_sentiment)
    else:
        shuffle_indices = np.random.permutation(np.arange(len(x_train)))
        x_dev = x_train[:2000]
        y_dev = y_train[:2000]
        x_train = x_train[2000:]
        y_train = y_train[2000:]

    print('Initializing models...')
    encoder = nn.Embedding(vocab_size, args.input_dim)
    encoder.cuda()

    model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim,
                     args.hidden_dim, vocab_size)
    model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.998),
                                 eps=1e-9)
    scheduler = get_constant_schedule_with_warmup(optimizer, args.warmup)

    best_loss = 10000
    if os.path.exists(model_file):
        print('Loading model checkpoint...')
        best_point = torch.load(model_file)
        encoder.load_state_dict(best_point['encoder'])
        model.load_state_dict(best_point['model'])
        optimizer.load_state_dict(best_point['optimizer'])
        best_loss = best_point['dev_loss']

    eval_at = args.evaluate_every
    stop_at = args.training_stopper

    step = 0
    print('Start training...')
    for epoch in range(args.num_epoch):
        if stop_at <= 0:
            break

        shuffle_indices = np.random.permutation(np.arange(len(x_train)))

        asp_losses = []
        asp_norm_losses = []
        sen_losses = []
        sen_norm_losses = []
        adv_losses = []

        train_iterator = tqdm(range(0, len(shuffle_indices), args.batch_size))
        for i in train_iterator:
            if stop_at <= 0:
                train_iterator.close()
                break
            if i + args.batch_size >= len(shuffle_indices):
                continue

            encoder.train()
            model.train()

            indices = shuffle_indices[i:i + args.batch_size]
            x_batch = [x_train[idx] for idx in indices]
            y_batch = [y_train[idx] for idx in indices]

            x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch]
            x_batch, mask = utils.pad_text(x_batch)

            tokens = encoder(x_batch)
            before, after, sent_pred, adv_pred = model(tokens, mask, x_batch)

            sent_gold = torch.Tensor(y_batch).long().cuda()
            losses = model.calculate_loss(before, after, sent_pred, adv_pred,
                                          sent_gold)

            asp_losses.append(losses[0].item())
            asp_norm_losses.append(losses[1].item())
            sen_losses.append(losses[2].item())
            sen_norm_losses.append(losses[3].item())
            adv_losses.append(losses[4].item())

            batch_loss = torch.sum(torch.stack(losses))
            batch_loss.backward()
            nn.utils.clip_grad_norm_(encoder.parameters(), 2)
            nn.utils.clip_grad_norm_(model.parameters(), 2)
            nan_check = False
            for param in model.parameters():
                if param.grad is not None:
                    if torch.isnan(param.grad.sum()):
                        nan_check = True
                        break
            if not nan_check:
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

            eval_at -= len(x_batch)
            if eval_at <= 0:
                shuffle_indices = np.random.permutation(np.arange(len(x_dev)))
                x_dev = np.array(x_dev)[shuffle_indices]
                y_dev = np.array(y_dev)[shuffle_indices]

                train_asp_loss = np.mean(asp_losses)
                train_asp_norm_loss = np.mean(asp_norm_losses)
                train_sen_loss = np.mean(sen_losses)
                train_sen_norm_loss = np.mean(sen_norm_losses)
                train_adv_loss = np.mean(adv_losses)

                dev_asp_loss = []
                dev_asp_norm_loss = []
                dev_sen_loss = []
                dev_sen_norm_loss = []
                dev_adv_loss = []

                for j in tqdm(range(0, len(x_dev), args.batch_size)):
                    encoder.eval()
                    model.eval()

                    x_batch = x_dev[j:j + args.batch_size]
                    x_batch = [tokenizer.encode(x_inst) for x_inst in x_batch]
                    x_batch, mask = utils.pad_text(x_batch)

                    tokens = encoder(x_batch)
                    before, after, sent_pred, adv_pred = model(
                        tokens, mask, x_batch)

                    sent_gold = torch.Tensor(
                        y_dev[j:j + args.batch_size]).long().cuda()
                    losses = model.calculate_loss(before, after, sent_pred,
                                                  adv_pred, sent_gold)

                    dev_asp_loss.append(losses[0].item())
                    dev_asp_norm_loss.append(losses[1].item())
                    dev_sen_loss.append(losses[2].item())
                    dev_sen_norm_loss.append(losses[3].item())
                    dev_adv_loss.append(losses[4].item())

                dev_asp_loss = np.mean(dev_asp_loss)
                dev_asp_norm_loss = np.mean(dev_asp_norm_loss)
                dev_sen_loss = np.mean(dev_sen_loss)
                dev_sen_norm_loss = np.mean(dev_sen_norm_loss)
                dev_adv_loss = np.mean(dev_adv_loss)
                dev_loss = dev_asp_loss + dev_asp_norm_loss + dev_sen_loss + dev_sen_norm_loss + dev_adv_loss

                tqdm.write("----------------------------------------------")
                tqdm.write("Epoch: %d, Batch: %d" % (epoch, i))
                tqdm.write(
                    "Train Losses: %.4f %.4f %.4f %.4f %.4f" %
                    (train_asp_loss, train_asp_norm_loss, train_sen_loss,
                     train_sen_norm_loss, train_adv_loss))
                tqdm.write("Dev Losses: %.4f %.4f %.4f %.4f %.4f" %
                           (dev_asp_loss, dev_asp_norm_loss, dev_sen_loss,
                            dev_sen_norm_loss, dev_adv_loss))

                if best_loss >= dev_loss:
                    tqdm.write("UPDATING MODEL FILE...")
                    best_loss = dev_loss
                    stop_at = args.training_stopper
                    torch.save(
                        {
                            'encoder': encoder.state_dict(),
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'dev_loss': dev_loss
                        }, model_file)
                else:
                    stop_at -= 1
                    tqdm.write("STOPPING AT: %d" % stop_at)

                tqdm.write("----------------------------------------------")

                asp_losses = []
                asp_norm_losses = []
                sen_losses = []
                sen_norm_losses = []
                adv_losses = []
                eval_at = args.evaluate_every
Example #6
0
    def get_items_from_dialogs(self):
        # just load the item pkl if it exists
        item_pkl = getattr(DatasetOption,
                           '{}_{}_item_pkl'.format(self.task, self.mode))
        if isfile(item_pkl):
            print('reading item pkl {}'.format(item_pkl))
            self.items = pkl.load(open(item_pkl, 'rb'))
            print('item pkl %s read complete' % item_pkl)
            return

        for item_idx, dialog in enumerate(self.dialogs):
            print('get items from dialogs {}/{}'.format(
                item_idx + 1, len(self.dialogs)))

            # standardize utterance
            # user, system, user, system...
            std_dialog = []
            for utter in dialog:
                if not std_dialog:
                    if utter.speaker != 'user':
                        std_dialog.append(Utterance('user', '', [], []))
                    else:
                        std_dialog.append(utter)
                else:
                    if utter.speaker != std_dialog[-1].speaker:
                        std_dialog.append(utter)
                    else:
                        std_dialog[-1].text += ' ' + utter.text
                        std_dialog[-1].images += utter.images
                        std_dialog[-1].false_images += utter.false_images

            item = [self.empty_utterance] * DatasetOption.context_size

            for idx, utter in enumerate(std_dialog):
                text, text_length = pad_text(self.vocab,
                                             DatasetOption.context_text_length,
                                             utter.text)
                true_images, true_prods = self.get_imgs_prods(
                    utter.images, DatasetOption.num_pos_images)
                false_images, false_prods = self.get_imgs_prods(
                    utter.false_images, DatasetOption.num_neg_images)
                item.append((text, text_length, true_images, true_prods,
                             false_images, false_prods))
                if utter.speaker == 'system':
                    item = item[-(DatasetOption.context_size + 1):]
                    texts, text_lengths, true_images, true_prods, false_images, false_prods = map(
                        list, zip(*item))
                    if self.task == 'image':
                        if self.has_no_image(
                                true_images[-1]) or self.has_no_image(
                                    false_images[-1]):
                            continue
                    self.items.append(
                        Item(texts, text_lengths, true_images, true_prods,
                             false_images, false_prods))

        # save items to pkl file
        print('save item pkl to {}...'.format(item_pkl))
        with open(item_pkl, 'wb') as f:
            pkl.dump(self.items, f)
        print('saved')
Example #7
0
def evaluate(args):
    print(args)

    condense_file = 'model/%s/condense.model' % args.data_type
    abstract_file = 'model/%s/abstract.model' % args.data_type
    os.makedirs('output/%s/' % args.data_type, exist_ok=True)
    solution_file = 'output/%s/predictions.txt' % args.data_type

    tokenizer = BertTokenizer.from_pretrained(args.bert_config)
    tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']})
    vocab_size = len(tokenizer)

    print('Loading datasets...')
    x_test, y_test = utils.abstract_data(args.test_file,
                                         multi_ref=args.multi_ref)
    if args.data_type == 'rotten':
        m_test = utils.get_movies_from_file(args.test_file)

    print('Initializing models...')
    con_encoder = nn.Embedding(vocab_size, args.input_dim)
    con_encoder.requires_grad_(False)
    con_encoder.cuda()

    con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim,
                         args.hidden_dim, vocab_size)
    con_model.requires_grad_(False)
    con_model.cuda()

    model = Abstract(vocab_size, args.hidden_dim, args.hidden_dim)
    model.requires_grad_(False)
    model.cuda()

    print('Loading models...')
    assert os.path.exists(condense_file)
    best_point = torch.load(condense_file)
    con_encoder.load_state_dict(best_point['encoder'])
    con_model.load_state_dict(best_point['model'])

    assert os.path.exists(abstract_file)
    best_point = torch.load(abstract_file)
    model.load_state_dict(best_point['model'])

    eval_at = args.evaluate_every
    stop_at = args.training_stopper

    f_sol = open(solution_file, 'w', encoding='utf-8', errors='ignore')
    printing = 5
    pred_sums = []
    print('Generating summaries...')
    for j in tqdm(range(0, len(x_test), 1)):
        model.eval()

        x_batch = x_test[j:j + 1]
        y_batch = y_test[j:j + 1]
        if args.data_type == 'rotten':
            m_batch = m_test[j:j + 1]

        x_batch = [[tokenizer.encode(x_rev) for x_rev in x_inst]
                   for x_inst in x_batch]
        y_batch = [tokenizer.encode(y_inst) for y_inst in y_batch]

        tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense(
            x_batch, tokenizer, con_encoder, con_model)

        tokens_batch = utils.pad_vector(tokens_batch, args.hidden_dim)[0]
        token_ids_batch, token_mask_batch = utils.pad_text(token_ids_batch)
        aspect_batch = torch.Tensor(
            aspect_batch).float().cuda()  # batch size, hidden dim
        sentiment_batch = torch.Tensor(
            sentiment_batch).float().cuda()  # batch size, hidden dim

        y_batch = [tokenizer.encode(y) for y in y_batch]
        output_batch, output_mask_batch = utils.pad_text(y_batch)

        pred_batch = model.beam_search(tokens_batch,
                                       token_ids_batch,
                                       token_mask_batch,
                                       aspect_batch,
                                       sentiment_batch,
                                       beam_size=args.beam_size,
                                       max_len=args.max_len)

        output = output_batch[0].cpu().detach().numpy()
        pred = pred_batch.cpu().detach().numpy()
        output = list([int(y) for y in output if int(y) != 101])
        pred = list([int(p) for p in pred if int(p) != 101])
        output = output[:output.index(102)]
        try:
            pred = pred[:pred.index(102)]
        except:
            pass
        output = tokenizer.decode(output)
        pred = tokenizer.decode(pred)
        if args.data_type == 'rotten':
            output = output.replace('<movie>', m_batch[0])
            pred = pred.replace('<movie>', m_batch[0])

        f_sol.write(pred + '\n')

        if printing:
            printing -= 1
            tqdm.write('gold: %s' % output)
            tqdm.write('pred: %s' % pred)
            tqdm.write("----------------------------------------------")

    f_sol.close()
    print('Summaries saved.')
Example #8
0
def train(args):
    print(args)

    condense_file = 'model/%s/condense.model' % args.data_type
    abstract_file = 'model/%s/abstract.model' % args.data_type

    tokenizer = BertTokenizer.from_pretrained(args.bert_config)
    tokenizer.add_special_tokens({'additional_special_tokens': ['<movie>']})
    vocab_size = len(tokenizer)

    print('Loading datasets...')
    x_train, y_train = utils.abstract_data(args.train_file)
    x_dev, y_dev = utils.abstract_data(args.test_file,
                                       multi_ref=args.multi_ref)

    print('Initializing models...')
    language_model = BertForMaskedLM.from_pretrained(args.bert_config)
    language_model.requires_grad_(False)
    language_model.cuda()

    assert os.path.exists(condense_file)
    con_encoder = nn.Embedding(vocab_size, args.input_dim)
    con_encoder.requires_grad_(False)
    con_encoder.cuda()

    con_model = Condense(args.aspect_dim, args.sentiment_dim, args.input_dim,
                         args.hidden_dim, vocab_size)
    con_model.requires_grad_(False)
    con_model.cuda()

    best_point = torch.load(condense_file)
    con_encoder.load_state_dict(best_point['encoder'])
    con_model.load_state_dict(best_point['model'])

    model = Abstract(vocab_size, args.hidden_dim, args.hidden_dim)
    model.cuda()

    optimizer = torch.optim.Adam(model.parameters())

    best_acc = 0
    saved_models = []
    if os.path.exists(abstract_file):
        print('Loading model checkpoint...')
        best_point = torch.load(abstract_file)
        model.load_state_dict(best_point['model'])
        optimizer.load_state_dict(best_point['optimizer'])
        best_acc = best_point['dev_acc']

    eval_at = args.evaluate_every
    stop_at = args.training_stopper

    losses = []
    gate = []

    print('Start training...')
    for epoch in range(args.num_epoch):
        if stop_at <= 0:
            break

        shuffle_indices = np.random.permutation(len(x_train))

        for step in tqdm(range(0, len(x_train), args.batch_size)):
            if stop_at <= 0:
                break

            indices = shuffle_indices[step:step + args.batch_size]
            x_batch = [x_train[idx] for idx in indices]
            y_batch = [y_train[idx] for idx in indices]

            x_batch = [[tokenizer.encode(x_rev) for x_rev in x_inst]
                       for x_inst in x_batch]
            y_batch = [tokenizer.encode(y_inst) for y_inst in y_batch]

            model.train()

            tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense(
                x_batch, tokenizer, con_encoder, con_model)
            output_smooth_batch, output_mask_batch = utils.bert_label_smoothing(
                y_batch, tokenizer, language_model)

            tokens_batch = utils.pad_vector(tokens_batch, args.hidden_dim)[0]
            token_ids_batch, token_mask_batch = utils.pad_text(token_ids_batch)
            aspect_batch = torch.Tensor(
                aspect_batch).float().cuda()  # batch size, hidden dim
            sentiment_batch = torch.Tensor(
                sentiment_batch).float().cuda()  # batch size, hidden dim

            output_batch, _ = utils.pad_text(y_batch)

            _, gt, loss = model(tokens_batch, token_ids_batch,
                                token_mask_batch, aspect_batch,
                                sentiment_batch, output_batch,
                                output_smooth_batch, output_mask_batch)
            losses.append(loss.item())
            gate.append(gt.mean().item())

            try:
                loss.backward()
            except:
                continue
            nn.utils.clip_grad_norm_(model.parameters(), 3)
            nan_check = False
            for param in model.parameters():
                if param.grad is not None:
                    if torch.isnan(param.grad.sum()):
                        nan_check = True
                        break
            if not nan_check:
                optimizer.step()
                optimizer.zero_grad()

            eval_at -= 1
            if eval_at <= 0:
                with torch.no_grad():
                    train_loss = np.mean(losses)
                    train_gate = np.mean(gate)

                    eval_at = args.evaluate_every
                    losses = []
                    gate = []

                    tqdm.write(
                        "----------------------------------------------")
                    tqdm.write("Epoch: %d" % (epoch))
                    tqdm.write("Step: %d" % (step))
                    tqdm.write('Train gate: %.4f' % train_gate)
                    tqdm.write('Train loss: %.4f' % train_loss)
                    if train_loss > 4:
                        continue

                    dev_acc = []
                    dev_loss = []
                    pred_sums = []
                    gold_sums = []
                    printing = 5
                    for j in tqdm(range(0, len(x_dev), 1)):
                        model.eval()

                        x_batch = x_dev[j:j + 1]
                        y_batch = y_dev[j:j + 1]

                        x_batch = [[
                            tokenizer.encode(x_rev) for x_rev in x_inst
                        ] for x_inst in x_batch]
                        y_batch = [
                            tokenizer.encode(y_inst) for y_inst in y_batch
                        ]

                        tokens_batch, token_ids_batch, aspect_batch, sentiment_batch = utils.run_condense(
                            x_batch, tokenizer, con_encoder, con_model)
                        output_smooth_batch, output_mask_batch = utils.bert_label_smoothing(
                            y_batch, tokenizer, language_model)

                        tokens_batch = utils.pad_vector(
                            tokens_batch, args.hidden_dim)[0]
                        token_ids_batch, token_mask_batch = utils.pad_text(
                            token_ids_batch)
                        aspect_batch = torch.Tensor(aspect_batch).float().cuda(
                        )  # batch size, hidden dim
                        sentiment_batch = torch.Tensor(sentiment_batch).float(
                        ).cuda()  # batch size, hidden dim

                        output_batch, _ = utils.pad_text(y_batch)

                        pred_batch, _, loss = model(tokens_batch,
                                                    token_ids_batch,
                                                    token_mask_batch,
                                                    aspect_batch,
                                                    sentiment_batch,
                                                    output_batch,
                                                    output_smooth_batch,
                                                    output_mask_batch,
                                                    dev=True)

                        dev_acc.append(loss[1].item())

                        output = output_batch[0].cpu().detach().numpy()
                        pred = pred_batch[0].argmax(-1).cpu().detach().numpy()
                        output = list(output)
                        pred = list(pred)
                        output = output[1:output.index(102)]
                        try:
                            pred = pred[:pred.index(102)]
                        except:
                            pass

                        output = tokenizer.decode(output)
                        pred = tokenizer.decode(pred)

                        gold_sums.append(output)
                        pred_sums.append(pred)
                        if printing:
                            printing -= 1
                            tqdm.write('gold: %s' % output)
                            tqdm.write('pred: %s' % pred)
                            tqdm.write(
                                "----------------------------------------------"
                            )

                    dev_acc = np.mean(dev_acc)
                    tqdm.write('Dev ACC: %.4f' % dev_acc)

                    if dev_acc >= best_acc:
                        tqdm.write('UPDATING MODEL FILE...')
                        best_acc = dev_acc
                        stop_at = args.training_stopper
                        torch.save(
                            {
                                'model': model.state_dict(),
                                'optimizer': optimizer.state_dict(),
                                'dev_acc': dev_acc,
                            }, abstract_file)
                    else:
                        stop_at -= 1
                        tqdm.write("STOPPING AT: %d" % stop_at)

                    tqdm.write(
                        "----------------------------------------------")
Example #9
0
 def to_tensors(self, vocab):
     # convert a product into a tensor
     text, length = pad_text(vocab, DatasetOption.product_text_length,
                             self.prod_str)
     attributes = torch.tensor(self.attributes, dtype=torch.long)
     return text, length, self.taxonomy, attributes