Exemple #1
0
def test_all():
    dataset = MovieLens('./ml-latest-small/ratings.csv', 'train', 60, 5, .8)
    model = BERT4REC(total_items=10000,
                     emb_dims=32,
                     num_heads=1,
                     dropout_rate=.8,
                     learning_rate=1e-3)
    print(model)

    test_dataset = DataLoader(dataset, batch_size=1)
    for i, data in enumerate(test_dataset):
        x, y_label, mask = data

        y_pred = model(x)
        y_pred = y_pred.view(-1, y_pred.size(2))
        y_label = y_label.view(-1)

        x = x.view(-1)
        mask = mask.view(-1)
        print(mask, mask.shape)
        print(x, x.shape)
        print(x.shape, y_label.shape, y_pred.shape)

        loss = masked_cross_entropy(y_pred, y_label, mask)
        acc = masked_accuracy(y_pred, y_label, mask)
        recall = masked_recall_at_k(y_pred, y_label, mask, 10)
        print(loss, acc, recall)
        break
Exemple #2
0
    def run_epoch(self, data, do_train):
        self.encoder.train(do_train)
        self.decoder.train(do_train)
        epoch_loss = 0
        for bi, batch in enumerate(data):
            X, Y, x_len, y_len = [Variable(b) for b in batch]
            if use_cuda:
                X = X.cuda()
                Y = Y.cuda()
                x_len = x_len.cuda()
                y_len = y_len.cuda()
            batch_size = X.size(0)
            seqlen_enc = X.size(1)
            seqlen_dec = Y.size(1)

            enc_outputs, enc_hidden = self.encoder(X)
            all_output = Variable(
                torch.zeros(batch_size, seqlen_dec, len(data.dataset.vocab_dec)))
            dec_input = Variable(torch.LongTensor(
                np.ones(batch_size) * LabeledDataset.CONSTANTS['SOS']))
            attn_pos = Variable(torch.LongTensor([0] * batch_size))
            range_helper = Variable(torch.LongTensor(np.arange(batch_size)),
                                    requires_grad=False)

            if use_cuda:
                all_output = all_output.cuda()
                dec_input = dec_input.cuda()
                attn_pos = attn_pos.cuda()
                range_helper = range_helper.cuda()

            hidden = tuple(e[:self.decoder.num_layers, :, :].contiguous()
                           for e in enc_hidden)

            for ts in range(seqlen_dec):
                dec_out, hidden = self.decoder(
                    dec_input, enc_outputs[range_helper, attn_pos], hidden)
                topv, top_idx = dec_out.max(-1)
                attn_pos = attn_pos + torch.eq(top_idx,
                                               LabeledDataset.CONSTANTS['<STEP>']).long()
                attn_pos = torch.clamp(attn_pos, 0, seqlen_enc-1)
                attn_pos = attn_pos.squeeze(0).contiguous()
                dec_input = Y[:, ts].contiguous()
                all_output[:, ts] = dec_out

            self.enc_opt.zero_grad()
            self.dec_opt.zero_grad()
            loss = masked_cross_entropy(all_output.contiguous(), Y, y_len)
            epoch_loss += loss.data[0]
            if do_train:
                loss.backward()
                self.enc_opt.step()
                self.dec_opt.step()
        epoch_loss /= (bi+1)
        return epoch_loss
def evaluate(context_input, aspect_input, review_input, review_output, extend_input, review_model):
    review_model.eval()

    context_input = context_input.to(device)
    aspect_input = aspect_input.to(device)
    review_input = review_input.to(device)
    review_output = review_output.to(device)
    extend_input = extend_input.to(device)

    # context encoder
    context_embed, hidden = review_model.forward_context(context_input)

    decoder_generate, decoder_hidden = review_model(context_embed, hidden, review_input, aspect_input, extend_input)

    mask = torch.ne(review_output, PAD_ID)
    loss = masked_cross_entropy(decoder_generate, review_output, mask)

    return loss.item()
Exemple #4
0
def main():
    with open("data/vocab.pkl", 'rb') as f:
        vocab = pickle.load(f)

    img_path = "data/flickr7k_images"
    cap_path = "data/factual_train.txt"
    styled_path = "data/humor/funny_train.txt"
    data_loader = get_data_loader(img_path, cap_path, vocab, 3)
    styled_data_loader = get_styled_data_loader(styled_path, vocab, 3)

    encoder = EncoderCNN(30)
    decoder = FactoredLSTM(30, 40, 40, len(vocab))

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # for i, (images, captions, lengths) in enumerate(data_loader):
    for i, (captions, lengths) in enumerate(styled_data_loader):
        # images = Variable(images, volatile=True)
        captions = Variable(captions.long())

        if torch.cuda.is_available():
            # images = images.cuda()
            captions = captions.cuda()

        # features = encoder(images)

        outputs = decoder(captions, features=None, mode="humorous")
        print(lengths - 1)
        print(outputs)
        print(captions[:, 1:])

        loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(),
                                    lengths - 1)

        print(loss)

        break
def train(context_input, aspect_input, review_input, review_output, extend_input, review_model, review_optimizer):
    review_optimizer.zero_grad()

    context_input = context_input.to(device)
    aspect_input = aspect_input.to(device)
    review_input = review_input.to(device)
    review_output = review_output.to(device)
    extend_input = extend_input.to(device)

    # context encoder
    context_embed, hidden = review_model.forward_context(context_input)

    decoder_generate, decoder_hidden = review_model(context_embed, hidden, review_input, aspect_input, extend_input)

    mask = torch.ne(review_output, PAD_ID)
    loss = masked_cross_entropy(decoder_generate, review_output, mask)
    loss.backward()

    clip = 5.0
    mc = torch.nn.utils.clip_grad_norm_(review_model.parameters(), clip)

    review_optimizer.step()

    return loss.item()