Python load_clean_captions Examples, util.load_clean_captions Python Examples

Example #1

0

Show file

def train():
    # load training dataset (6K)
    filename = 'Flickr_8k.trainImages.txt'
    train = util.load_ids(filename)
    print('Dataset: %d' % len(train))
    train_captions = util.load_clean_captions('descriptions.txt', train)
    print('Captions: train number=%d' % len(train_captions))
    # photo features
    train_features = util.load_photo_features('features.pkl', train)
    print('Photos: train=%d' % len(train_features))
    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)
    # determine the maximum sequence length
    max_len = util.get_max_length(train_captions)
    print('Description Length: %d' % max_len)

    # define the model
    model = caption_model(vocab_size, max_len)
    # train the model, run epochs manually and save after each epoch
    epochs = 20
    steps = len(train_captions)
    for i in range(epochs):
        # create the data generator
        generator = data_generator(train_captions, train_features, tokenizer, max_len)
        # fit for one epoch
        model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
        # save model
        model.save('model_' + str(i) + '.h5')

Example #2

0

Show file

def evaluate_model_run():
    model = load_model('model_19.h5')
    filename = 'Flickr_8k.testImages.txt'
    test = util.load_ids(filename)
    # test play as "index" role, just from description.txt and featute.pkl to
    # load the special info which define in "index"
    test_caption = util.load_clean_captions('descriptions.txt', test)
    test_features = util.load_photo_features('features.pkl', test)
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_caption,
                                                test_features, tokenizer)
    print('BLEU-1: %f' % bleu1)
    print('BLEU-2: %f' % bleu2)
    print('BLEU-3: %f' % bleu3)
    print('BLEU-4: %f' % bleu4)

Example #3

0

Show file

File: task5.py Project: b43646/Image-Caption

def evaluate_check():
    filename = 'Flickr_8k.testImages.txt'
    test = util.load_ids(filename)

    test_features = util.load_photo_features('features3.pkl', test)
    print("Photos: test=%d" % len(test_features))

    # load the model
    model_name = 'model_1.h5'
    model = load_model(model_name)

    tokenizer = load(open('tokenizer.pkl', 'rb'))
    captions = util.load_clean_captions('descriptions.txt', test)

    evaluate_model(model, captions, test_features, tokenizer)

Example #4

0

Show file

def train():
    # load training dataset (6K)
    filename = 'Flickr_8k.trainImages.txt'
    train = util.load_ids(filename)
    print('Dataset: %d' % len(train))
    train_captions = util.load_clean_captions('descriptions.txt', train)
    print('Captions: train number=%d' % len(train_captions))
    # photo features
    train_features = util.load_photo_features('features.pkl', train)
    print('Photos: train=%d' % len(train_features))
    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)
    # determine the maximum sequence length
    max_len = util.get_max_length(train_captions)
    print('Description Length: %d' % max_len)

    # define the model
    model = caption_model(vocab_size, max_len)
    # train the model, run epochs manually and save after each epoch
    epochs = 5
    steps = len(train_captions)
    for i in range(epochs):
        # create the data generator
        generator = data_generator(train_captions, train_features, tokenizer,
                                   max_len)
        # fit for one epoch

        # generator just return two dimenstion data, the first means X, which has two data
        # first is the featur of the pic, second is the surfix words; second means Y, the
        # word of predict for Next.
        # At first I don't kown why generator will return three value(feature, surfix, the next word)
        # but the model just has two input, later I got first tow means X and will go into the model,
        # the third is means Y, the reponse variance.

        # Don't need fear the generator will be executed forever, for the super-parameter epoches & steps_per_epoch
        # has limited the time of invoking generator, which is epochs * steps_per_epoch

        # Generator, is magical!
        model.fit_generator(generator,
                            epochs=1,
                            steps_per_epoch=steps,
                            verbose=1)

        # save model
        model.save('model' + os.sep + 'model_my' + str(i) + '.h5')

Example #5

0

Show file

def create_tokenizer():

    """
    根据训练数据集中图像名，和其对应的标题，生成一个tokenizer,作为LSTM的输入/输出必须是数字，所以需要我们使用
    字典数据类型来存储文字和数字对应关系。
    :return: 生成的tokenizer
    https://keras-cn.readthedocs.io/en/latest/legacy/preprocessing/text/#tokenizer
    """

    train_image_names = util.load_image_names('{}{}{}'.format(current_path, os.sep, 'Flickr_8k.trainImages.txt'))
    description_path = '{}{}{}'.format(current_path, os.sep, 'descriptions.txt')
    train_descriptions = util.load_clean_captions(description_path, train_image_names)
    lines = util.to_list(train_descriptions)

    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

Example #6

0

Show file

File: task5.py Project: qian5683/DeepLearning_Notebook

def evaluate_model_run(model_name):
    # load test set
    filename = '../Flickr8k_text/Flickr_8k.testImages.txt'
    # test = util.load_ids(filename)
    test = task3.load_image_names(filename)
    test_captions = util.load_clean_captions('../task3/descriptions.txt', test)

    # photo feaatures
    test_features = util.load_photo_features('../task2/features.pkl', test)
    print('Photos: test=%d' % len(test_features))

    # load the model

    model = load_model(model_name)

    tokenizer = load(open('../task3/tokenizer.pkl', 'rb'))

    print(evaluate_model(model, test_captions, test_features, tokenizer))

Example #7

0

Show file

def evaluate_model_run():
    # load test set
    filename = 'Flickr_8k.testImages.txt'
    test = util.load_ids(filename)
    print('number of test images: %d' % len(test))
    test_captions = util.load_clean_captions('descriptions.txt', test)
    # photo features
    test_features = util.load_photo_features('features.pkl', test)

    # load the model
    filename = 'model_19.h5'
    model = load_model(filename)

    tokenizer = load(open('tokenizer.pkl', 'rb'))
    # evaluate model
    bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_captions,
                                                test_features, tokenizer, 40)
    print('BLEU-1: %f' % bleu1)
    print('BLEU-2: %f' % bleu2)
    print('BLEU-3: %f' % bleu3)
    print('BLEU-4: %f' % bleu4)

Example #8

0

Show file

File: task4.py Project: songdongqing/ImageCaption

    #
    # # adding embeddings to model
    # predictive_model.layers[2]
    # predictive_model.layers[2].set_weights([embedding_matrix])
    # predictive_model.layers[2].trainable = False
    pass


if __name__ == '__main__':
    # add_weights()

    filename = 'Flickr_8k.trainImages.txt'
    train = util.load_ids(filename)  # 返回了一个{}，包含了文件名（去除.jpg）

    des_path = r'E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/task5/descriptions.txt'
    train_captions = util.load_clean_captions(des_path, train)
    # print('Captions: train number=%d' % len(train_captions))
    # print(train_captions["3227594168_3351722aae"])  # ['startseq Two blonde ladies wearing sunglasses lounge on the grass with a dacshund . endseq',
    # 'startseq Two blonde young women hang out in the grass with a brown dog . endseq',
    # 'startseq Two blond women sit in grass with a small dog . endseq',
    # 'startseq Two women laying on grass with a dog . endseq',
    # 'startseq Two women on a grassy hill lit by the sun ; one looks at a dachshund , the other looks to the side . endseq']

    # photo features
    feature_path = r"E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/features.pkl"
    train_features = util.load_photo_features(feature_path, train)
    # print('Photos: train=%d' % len(train_features))
    # print(len(train_features["3585117340_73e96b6173"][0]))  # 4096

    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))