def sentence_generator():
    category_name, plural_name, categories = get_open_image_categories()
    replace = dict(zip(plural_name, category_name))
    category_set = set(category_name)

    with open('data/sentences.pkl', 'r') as f:
        captions = pkl.load(f)

    if FLAGS.new_dict:
        _create_vocab(captions)
        with open('data/glove_vocab.pkl', 'r') as f:
            glove = pkl.load(f)
            glove.append('<S>')
            glove.append('</S>')
            glove = set(glove)
        with open(FLAGS.word_counts_output_file, 'r') as f:
            vocab = list(f)
            vocab = [i.strip() for i in vocab]
            vocab = [i.split() for i in vocab]
            vocab = [(i, int(j)) for i, j in vocab if i in glove]
        word_counts = [i for i in vocab if i[0] in category_set or i[1] >= 40]
        words = set([i[0] for i in word_counts])
        for i in category_name:
            if i not in words:
                word_counts.append((i, 0))
        with open(FLAGS.word_counts_output_file, 'w') as f:
            f.write('\n'.join(['%s %d' % (w, c) for w, c in word_counts]))

    vocab = vocabulary.Vocabulary(FLAGS.word_counts_output_file)

    all_ids = dict([(k, vocab.word_to_id(v)) for k, v in categories.items()])
    with open('data/all_ids.pkl', 'w') as f:
        pkl.dump(all_ids, f)

    context = tf.train.Features()
    random.shuffle(captions)
    for c in captions:
        for i, w in enumerate(c):
            if w in replace:
                c[i] = replace[w]
        k = parse_key_words(c, category_set)
        c = [vocab.word_to_id(word) for word in c]
        if c.count(vocab.unk_id) > len(c) * 0.15:
            continue
        k = [vocab.word_to_id(i) for i in k]
        feature_lists = tf.train.FeatureLists(
            feature_list={
                'key': _int64_feature_list(k),
                'sentence': _int64_feature_list(c)
            })
        sequence_example = tf.train.SequenceExample(
            context=context, feature_lists=feature_lists)
        yield sequence_example.SerializeToString()
Example #2
0
def image_generator(split):
    with open('data/coco_%s.txt' % split, 'r') as f:
        filename = list(f)
        filename = [i.strip() for i in filename]
    if split == 'train':
        random.shuffle(filename)
    with open('data/all_ids.pkl', 'r') as f:
        all_ids = pkl.load(f)
    with h5py.File('data/object.hdf5', 'r') as f:
        for i in filename:
            name = os.path.splitext(i)[0]
            detection_classes = f[name + '/detection_classes'][:].astype(
                np.int32)
            detection_scores = f[name + '/detection_scores'][:]
            detection_classes, ind = np.unique(detection_classes,
                                               return_index=True)
            detection_scores = detection_scores[ind]
            detection_classes = [all_ids[j] for j in detection_classes]
            image_path = FLAGS.image_path + '/' + i
            with tf.gfile.FastGFile(image_path, 'r') as g:
                image = g.read()
            context = tf.train.Features(
                feature={
                    'image/name': _bytes_feature(i),
                    'image/data': _bytes_feature(image),
                })
            feature_lists = tf.train.FeatureLists(
                feature_list={
                    'classes': _int64_feature_list(detection_classes),
                    'scores': _float_feature_list(detection_scores)
                })
            sequence_example = tf.train.SequenceExample(
                context=context, feature_lists=feature_lists)

            yield sequence_example.SerializeToString()
Example #3
0
def run(iname):
    tf = infer.tf
    if iname in self_caps:
        sentence = self_caps[iname].split()
        sentence = [infer.vocab.word_to_id(i) for i in sentence]
        context = tf.train.Features()
        feature_lists = tf.train.FeatureLists(
            feature_list={'sentence': _int64_feature_list(sentence)})
        sequence_example = tf.train.SequenceExample(
            context=context, feature_lists=feature_lists)
        return sequence_example.SerializeToString()
    else:
        sentence = [-1]
        context = tf.train.Features()
        feature_lists = tf.train.FeatureLists(
            feature_list={'sentence': _int64_feature_list(sentence)})
        sequence_example = tf.train.SequenceExample(
            context=context, feature_lists=feature_lists)
        return sequence_example.SerializeToString()
Example #4
0
def run(classes):
    tf = infer.tf
    sentences = infer.infer(classes[::-1])
    sentence = sentences[0][0].split()
    sentence = [infer.vocab.word_to_id(i) for i in sentence]
    context = tf.train.Features()
    feature_lists = tf.train.FeatureLists(
        feature_list={'sentence': _int64_feature_list(sentence)})
    sequence_example = tf.train.SequenceExample(context=context,
                                                feature_lists=feature_lists)
    return sequence_example.SerializeToString()