Example #1
0
    id_to_word[v] = k

image_model = VGG19()
image_model.load(args.image_model)

caption_net = ImageCaption(len(word_ids), feature_num, hidden_num)
serializers.load_hdf5(args.model, caption_net)

xp = np
if args.gpu >= 0:
    cuda.check_cuda_available()
    gpu_device = args.gpu
    cuda.get_device(gpu_device).use()
    xp = cuda.cupy
    image_model.to_gpu(gpu_device)
    caption_net.to_gpu(gpu_device)

bos = word_ids['<S>']
eos = word_ids['</S>']

with open(args.list) as f:
    paths = filter(bool, f.read().split('\n'))


def generate(net, image_model, image_path):
    feature = image_model.feature(image_path)
    net.initialize(feature)
    candidates = [(net, [bos], 0)]

    for i in range(max_length):
        next_candidates = []
Example #2
0
def main(args):
    model_path = args.MODEL_PATH
    all_data_path = args.ALL_DATA_PATH
    all_feature_path = args.ALL_FEATURE_PATH
    save_path = args.SAVE_PATH

    with open(args.WORD2ID_PATH, 'rb') as f:
        word_id_dic = pickle.load(f)

    bos = word_id_dic['<s>']
    eos = word_id_dic['</s>']
    unk = word_id_dic['<unk>']


    id2word = {word_id_dic[x]: x for x in word_id_dic.keys()}

    print('data loading...')
    with open(all_data_path, 'rb') as f:
        all_data = pickle.load(f)
    with open(all_feature_path, 'rb') as f:
        all_features = pickle.load(f)

    print('data loaded!')

    test_features, test_qa_ids, test_target_vecs = create_data(all_data, all_features, 'test')

    feature_num = 2005
    hidden_num = 1024
    vocab_num = len(word_id_dic)
    attr_num = 5

    CaptionNet = ImageCaption(vocab_num, attr_num, feature_num, hidden_num)
    serializers.load_hdf5(model_path, CaptionNet)
    CaptionNet.to_gpu(gpu_device)

    beam_width = 3
    max_length = 100

    question_list = []

    for i in tqdm(range(len(test_qa_ids))):
        qa_id = test_qa_ids[i]
        target_vec = test_target_vecs[i]

        target_var = Variable(xp.array([target_vec], dtype=xp.float32))
        concat_feature = test_features[i]
        feature_var = Variable(xp.array([concat_feature], dtype=xp.float32))

        with chainer.using_config('train', False), chainer.no_backprop_mode():
            CaptionNet.image_init(feature_var)

        candidates = [(CaptionNet, [bos], 0)]
        next_candidates = beam_search(candidates, target_var, norm=True)
        for j in range(max_length):
            next_candidates = beam_search(next_candidates, target_var, norm=True)
            if all([x[1][-1] == eos for x in next_candidates]):
                break
        result = [k[1] for k in next_candidates]
        tokens = [id2word[token_id] for token_id in result[0][1:-1]]
        question_list.append([qa_id, tokens])

    all_list = []
    for each_question in question_list:
        each_dic = {}
        qa_id = each_question[0]
        question = each_question[1]
        join_question = (' '.join([word + '' for word in question]) + '?').capitalize()
        each_dic['id'] = qa_id
        each_dic['image_id'] = qa_id
        each_dic['caption'] = join_question
        all_list.append(each_dic)

    with open(save_path, 'w') as f:
        json.dump(all_list, f)
Example #3
0
image_dataset = scipy.io.loadmat(args.image)
images = image_dataset['feats'].transpose((1, 0))

train_image_ids = sentence_dataset['images']['train']
train_sentences = sentence_dataset['sentences']['train']
test_image_ids = sentence_dataset['images']['test']
test_sentences = sentence_dataset['sentences']['test']
word_ids = sentence_dataset['word_ids']
feature_num = images.shape[1]
hidden_num = 512
batch_size = 128

print 'word count: ', len(word_ids)
caption_net = ImageCaption(len(word_ids), feature_num, hidden_num)
if gpu_device is not None:
    caption_net.to_gpu(gpu_device)
optimizer = optimizers.Adam()
optimizer.setup(caption_net)

if args.model is not None:
    serializers.load_hdf5(args.model + '.model', caption_net)
    serializers.load_hdf5(args.model + '.state', optimizer)

bos = word_ids['<S>']
eos = word_ids['</S>']
unknown = word_ids['<UNK>']

def random_batches(image_groups, sentence_groups):
    batches = []
    for image_ids, sentences in zip(image_groups, sentence_groups):
        length = len(sentences)
Example #4
0
def main(args):
    model_save_path = args.MODEL_PATH
    all_data_path = args.ALL_DATA_PATH
    all_feature_path = args.ALL_FEATURE_PATH

    if not os.path.exists(model_save_path):
        print('make model save directory')
        os.mkdir(model_save_path)

    with open(args.WORD2ID_PATH, 'rb') as f:
        word_id_dic = pickle.load(f)

    eos = word_id_dic['</s>']

    print('all data loading...')
    with open(all_data_path, 'rb') as f:
        all_data = pickle.load(f)
    print('all data loaded!')
    print('all feature loading...')
    with open(all_feature_path, 'rb') as f:
        all_features = pickle.load(f)
    print('all feature loaded!')

    train_features, train_questions, train_target_vecs = create_data(
        all_data, all_features, 'train')
    valid_features, valid_questions, valid_target_vecs = create_data(
        all_data, all_features, 'valid')

    feature_num = 2005
    hidden_num = 1024
    vocab_num = len(word_id_dic)
    attr_num = 5

    epoch_num = 100
    batch_size = 100

    CaptionNet = ImageCaption(vocab_num, attr_num, feature_num, hidden_num)
    CaptionNet.to_gpu(gpu_device)

    optimizer = optimizers.Adam(alpha=4.0e-4)

    optimizer.setup(CaptionNet)

    N_train = len(train_target_vecs)
    N_valid = len(valid_target_vecs)

    for epoch in tqdm(range(epoch_num)):
        train_perm = np.random.permutation(N_train)
        sum_loss = 0
        sum_acc = 0
        sum_size = 0

        for index in range(0, N_train, batch_size):
            CaptionNet.zerograds()
            index_array = train_perm[index:index + batch_size]
            batch_features = xp.array(train_features[index_array])
            batch_questions = xp.array(train_questions[index_array])
            batch_targets = xp.array(train_target_vecs[index_array])

            feature_var = Variable(batch_features)
            question_var = Variable(batch_questions)
            target_var = Variable(batch_targets)

            loss, acc, size = forward(target_var, feature_var, question_var,
                                      CaptionNet, eos)

            loss.backward()
            optimizer.update()
            sum_loss += loss.data.tolist()
            sum_acc += acc.tolist()
            sum_size += size.tolist()

        print('loss:', sum_loss / sum_size, 'acc:', sum_acc / sum_size)

        valid_sum_loss = 0
        valid_sum_acc = 0
        valid_sum_size = 0

        for valid_index in range(0, N_valid, batch_size):
            valid_batch_features = valid_features[valid_index:valid_index +
                                                  batch_size]
            valid_batch_questions = valid_questions[valid_index:valid_index +
                                                    batch_size]
            valid_batch_targets = valid_target_vecs[valid_index:valid_index +
                                                    batch_size]

            valid_feature_var = Variable(
                xp.array(valid_batch_features, dtype=xp.float32))
            valid_question_var = Variable(
                xp.array(valid_batch_questions, dtype=xp.float32))
            valid_target_var = Variable(
                xp.array(valid_batch_targets, dtype=xp.float32))

            with chainer.using_config('train',
                                      False), chainer.no_backprop_mode():
                valid_loss, valid_acc, valid_size = forward(
                    valid_target_var, valid_feature_var, valid_question_var,
                    CaptionNet, eos)

            valid_sum_loss += valid_loss.data.tolist()
            valid_sum_acc += valid_acc.tolist()
            valid_sum_size += valid_size.tolist()

        print(
            "{:3d} epoch train loss : {:.5f}, acc : {:.5f} | valid loss : {:.5f}, acc : {:.5f}\n"
            .format(epoch, sum_loss / sum_size, sum_acc / sum_size,
                    valid_sum_loss / valid_sum_size,
                    valid_sum_acc / valid_sum_size))

        if (epoch > 0) and (epoch % 10 == 0):
            serializers.save_hdf5(
                model_save_path + 'model_' + str(epoch) + '.h5', CaptionNet)
            serializers.save_hdf5(
                model_save_path + 'optimizer_' + str(epoch) + '.h5', optimizer)