예제 #1
0
def test(setup_data, input_seqs=None, test_style=ROM_STYLE):

    if input_seqs is None:
        _, input_rems_text = get_data(train=False, test_style=test_style)
    else:
        input_rems_text = input_seqs
        slen = len(input_seqs)
        for i in xrange(slen):
            input_rems_text[i].append(test_style)

    _, _, enc_tok_text, _ = tokenize_text(
        input_rems_text,
        idx_to_word=setup_data['enc_idx_to_word'],
        word_to_idx=setup_data['enc_word_to_idx'])
    enc_padded_text = pad_text(enc_tok_text)

    dlen = enc_padded_text.shape[0]
    num_batch = dlen / BATCH_SIZE
    if dlen % BATCH_SIZE != 0:
        num_batch += 1
    res = []
    for i in xrange(num_batch):
        dec_tensor = generate(
            setup_data['enc'], setup_data['dec'],
            enc_padded_text[i * BATCH_SIZE:(i + 1) * BATCH_SIZE])
        res.append(dec_tensor)

    all_text = []
    res = np.concatenate(res, axis=0)
    for row in res:
        utok = untokenize(row, setup_data['dec_idx_to_word'], to_text=True)
        all_text.append(utok)
    return all_text
예제 #2
0
def train():

    input_text, input_rems_text = get_data(train=True)

    dec_idx_to_word, dec_word_to_idx, dec_tok_text, dec_bias = tokenize_text(
        input_text, lower_case=True, vsize=20000)
    dec_padded_text = pad_text(dec_tok_text)
    dec_vocab_size = len(dec_idx_to_word)

    enc_idx_to_word, enc_word_to_idx, enc_tok_text, _ = tokenize_text(
        input_rems_text)
    enc_padded_text = pad_text(enc_tok_text)
    enc_vocab_size = len(enc_idx_to_word)

    dec_text_tensor = torch.tensor(dec_padded_text, requires_grad=False)
    if cuda:
        dec_text_tensor = dec_text_tensor.cuda(device=device)

    enc, dec = build_model(enc_vocab_size, dec_vocab_size, dec_bias=dec_bias)
    enc_optim, dec_optim, lossfunc = build_trainers(enc, dec)

    num_batches = enc_padded_text.shape[0] / BATCH_SIZE

    sm_loss = None
    enc.train()
    dec.train()
    for epoch in xrange(0, 13):
        print "Starting New Epoch: %d" % epoch

        order = np.arange(enc_padded_text.shape[0])
        np.random.shuffle(order)
        enc_padded_text = enc_padded_text[order]
        dec_text_tensor.data = dec_text_tensor.data[order]

        for i in xrange(num_batches):
            s = i * BATCH_SIZE
            e = (i + 1) * BATCH_SIZE

            _, enc_pp, dec_pp, enc_lengths = make_packpadded(
                s, e, enc_padded_text, dec_text_tensor)

            enc.zero_grad()
            dec.zero_grad()

            hid = enc.initHidden(BATCH_SIZE)

            out_enc, hid_enc = enc.forward(enc_pp, hid, enc_lengths)

            hid_enc = torch.cat([hid_enc[0, :, :], hid_enc[1, :, :]],
                                dim=1).unsqueeze(0)
            out_dec, hid_dec, attn = dec.forward(dec_pp[:, :-1], hid_enc,
                                                 out_enc)

            out_perm = out_dec.permute(0, 2, 1)
            dec_text_tensor.shape
            loss = lossfunc(out_perm, dec_pp[:, 1:])

            if sm_loss is None:
                sm_loss = loss.data
            else:
                sm_loss = sm_loss * 0.95 + 0.05 * loss.data

            loss.backward()
            clip_grad_value_(enc_optim.param_groups[0]['params'], 5.0)
            clip_grad_value_(dec_optim.param_groups[0]['params'], 5.0)
            enc_optim.step()
            dec_optim.step()

            #del loss
            if i % 100 == 0:
                print "Epoch: %.3f" % (i / float(num_batches) +
                                       epoch, ), "Loss:", sm_loss
                print "GEN:", untokenize(
                    torch.argmax(out_dec, dim=2)[0, :], dec_idx_to_word)
                #print "GEN:", untokenize(torch.argmax(out_dec,dim=2)[1,:], dec_idx_to_word)
                print "GT:", untokenize(dec_pp[0, :], dec_idx_to_word)
                print "IN:", untokenize(enc_pp[0, :], enc_idx_to_word)

                print torch.argmax(attn[0], dim=1)
                print "--------------"
        save_state(enc, dec, enc_optim, dec_optim, dec_idx_to_word,
                   dec_word_to_idx, enc_idx_to_word, enc_word_to_idx, epoch)
예제 #3
0
def test(setup_data, test_folder = None, test_images=None):

    enc = setup_data['enc']
    dec = setup_data['dec']
    cnn = setup_data['cnn']
    trans = setup_data['trans']
    loaded_state = setup_data['loaded_state']
    s2s_data = setup_data['s2s_data']

    dec_vocab_size = len(loaded_state['dec_idx_to_word'])

    if test_folder is not None:
        # load images from folder
        img_reader = get_image_reader(test_folder, trans, BATCH_SIZE)
        using_images = True
    elif test_images is not None:
        # load images from memory
        img_reader = torch.utils.data.DataLoader(
                ImageNetLoader(test_images, trans),
                batch_size=BATCH_SIZE, shuffle=False,
                num_workers=1, pin_memory=True)
        using_images = True
    else:
        # load precomputed image features from dataset
        feats, filenames, sents = get_data(train=False)
        feats_tensor = torch.tensor(feats, requires_grad=False)
        if cuda:
           feats_tensor = feats_tensor.cuda(device=device)
        img_reader = TestIterator(feats_tensor, sents)
        using_images = False

    all_text = []
    for input, text_data in img_reader:

        if using_images:
            if cuda:
                input = input.cuda(device=device)
            with torch.no_grad():
                batch_feats_tensor = cnn(input)
        else:
            batch_feats_tensor = input

        dec_tensor = generate(enc, dec, batch_feats_tensor)

        untok = []
        for i in xrange(dec_tensor.shape[0]):
            untok.append(untokenize(dec_tensor[i], 
                loaded_state['dec_idx_to_word'], 
                to_text=False))

        text = s2s.test(s2s_data, untok)

        for i in range(len(text)):
            if using_images:
                # text data is filenames
                print "FN :", text_data[i]
            else:
                # text data is ground truth sentences
                print "GT :", text_data[i]
            print "DET:", ' '.join(untok[i])
            print "ROM:", text[i], "\n"
        all_text.extend(text)
    return all_text
예제 #4
0
def train():

    feats, filenames, sents = get_data(train=True)

    dec_idx_to_word, dec_word_to_idx, dec_tok_text, dec_bias = tokenize_text(sents)
    dec_padded_text = pad_text(dec_tok_text)
    dec_vocab_size = len(dec_idx_to_word)
    
    enc, dec = build_model(dec_vocab_size, dec_bias)
    enc_optim, dec_optim, lossfunc = build_trainers(enc, dec)
    
    feats_tensor = torch.tensor(feats, requires_grad=False)
    dec_text_tensor = torch.tensor(dec_padded_text, requires_grad=False)
    if cuda:
       feats_tensor = feats_tensor.cuda(device=device)
       dec_text_tensor = dec_text_tensor.cuda(device=device) 

    num_batches = feats.shape[0] / BATCH_SIZE

    sm_loss = None
    enc.train()
    dec.train()
    for epoch in xrange(0, 13):
        print "Starting New Epoch: %d" % epoch
        
        order = np.arange(feats.shape[0])
        np.random.shuffle(order)
        del feats_tensor, dec_text_tensor
        if cuda:
            torch.cuda.empty_cache()
        feats_tensor = torch.tensor(feats[order], requires_grad=False)
        dec_text_tensor = torch.tensor(dec_padded_text[order], requires_grad=False)
        if cuda:
           feats_tensor = feats_tensor.cuda(device=device)
           dec_text_tensor = dec_text_tensor.cuda(device=device) 

        for i in xrange(num_batches):
            s = i * BATCH_SIZE
            e = (i+1) * BATCH_SIZE

            enc.zero_grad()
            dec.zero_grad()

            hid_enc = enc.forward(feats_tensor[s:e]).unsqueeze(0)
            out_dec, hid_dec = dec.forward(dec_text_tensor[s:e,:-1], hid_enc)

            out_perm = out_dec.permute(0, 2, 1)
            loss = lossfunc(out_perm, dec_text_tensor[s:e,1:])
            
            if sm_loss is None:
                sm_loss = loss.data
            else:
                sm_loss = sm_loss*0.95 + 0.05*loss.data

            loss.backward()
            enc_optim.step()
            dec_optim.step()
            
            if i % 100 == 0:
                print "Epoch: %.3f" % (i/float(num_batches) + epoch,), "Loss:", sm_loss
                print "GEN:", untokenize(torch.argmax(out_dec,dim=2)[0,:], dec_idx_to_word)
                print "GT:", untokenize(dec_text_tensor[s,:], dec_idx_to_word)
                print "--------------"

        save_state(enc, dec, enc_optim, dec_optim, dec_idx_to_word, dec_word_to_idx, epoch)
def test(setup_data, test_folder=None, test_images=None):
    enc = setup_data['enc']
    dec = setup_data['dec']
    cnn = setup_data['cnn']
    trans = setup_data['trans']
    loaded_state = setup_data['loaded_state']
    s2s_data = setup_data['s2s_data']
    k = 0

    dec_vocab_size = len(loaded_state['dec_idx_to_word'])
    id_captions = []
    json_data = json.dumps(id_captions)

    if test_folder is not None:
        # load images from folder
        img_reader = get_image_reader(test_folder, trans, BATCH_SIZE)
        using_images = True
    elif test_images is not None:
        # load images from memory
        img_reader = torch.utils.data.DataLoader(ImageNetLoader(
            test_images, trans),
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=True)
        using_images = True
    else:
        # load precomputed image features from dataset
        feats, filenames, sents = get_data(train=False)
        feats_tensor = torch.tensor(feats, requires_grad=False)
        if cuda:
            feats_tensor = feats_tensor.cuda(device=device)
        img_reader = TestIterator(feats_tensor, sents)
        using_images = False

    all_text = []
    for input, text_data in img_reader:

        if using_images:
            if cuda:
                input = input.cuda(device=device)
            with torch.no_grad():
                batch_feats_tensor = cnn(input)
        else:
            batch_feats_tensor = input

        dec_tensor = generate(enc, dec, batch_feats_tensor)

        untok = []
        for i in range(dec_tensor.shape[0]):
            untok.append(
                untokenize(dec_tensor[i],
                           loaded_state['dec_idx_to_word'],
                           to_text=False))

        text = test_s2s(s2s_data, untok)

        for i in range(len(text)):
            filenames[k] = filenames[k].replace('COCO_val2014_', '')
            filenames[k] = filenames[k].replace('.jpg', '')
            j = {
                "image_id": int(filenames[k]),
                "caption": text[i],
                "words": ' '.join(untok[i])
            }
            id_captions.append(j)
            k += 1

        all_text.extend(text)
        with open(
                'results/captions_val2014_' + test_model_fname +
                seq_to_seq_test_model_fname + '_results.json', 'w') as outfile:
            json.dump(id_captions, outfile)

        pandas.DataFrame(id_captions).to_csv(
            'results/captions_val2014_' + test_model_fname +
            seq_to_seq_test_model_fname + '_results.csv',
            index=False)
    return all_text