예제 #1
0
class ImageCaptionModel(object):

    def __init__(self, w_path, dec_path='dataset/text/dec_map.pkl',
                    enc_path='dataset/text/enc_map.pkl',
                    embedding_path='pre_trained/glove.6B.100d.txt'):

        dec_map = cPickle.load(open(dec_path, 'rb'))
        enc_map = cPickle.load(open(enc_path, 'rb'))

        embedding_matrix = generate_embedding_matrix(embedding_path, dec_map)
        self.model = image_caption_model(embedding_matrix=embedding_matrix)

        self.extractor = ImageFeatureExtractor('weights/tensorflow_inception_graph.pb')
        self.model.load_weights(w_path)
        self.dec_map = dec_map
        self.enc_map = enc_map

    def predict(self, img_path):
        img_feature = self.extractor.extract_features(img_path, flag_from_file=True)
        #print(img_feature)
        sentence = generate_k_best(self.model, self.enc_map, self.dec_map, img_feature, k=6, max_len=15)
        return sentence
예제 #2
0
if __name__ == '__main__':
    max_sent_len = 28
    model_path = './weights/v1.0.0_11_0_1494239663.5093253_602.h5'
    image_path = sys.argv[1]
    ife = ImageFeatureExtractor('model/inception_v3_2016_08_28_frozen.pb')

    with open('./train/word2idx.pkl', 'rb') as f:
        word2idx = pickle.load(f)
    with open('./train/idx2word.pkl', 'rb') as f:
        idx2word = pickle.load(f)
    vocab_size = len(word2idx) + 1
    model = image_caption_model(vocab_size=vocab_size)
    model.load_weights(model_path)
    start_sign = word2idx['+']

    img = np.array([ife.extract_features(image_path)])

    cur, vhist, answer = np.array([[start_sign]]), np.array([[0] * vocab_size
                                                             ]), []
    vhist = np.array(vhist)
    for idx in range(0, max_sent_len):
        seq = np.array([[1 if i == idx else 0
                         for i in range(0, max_sent_len)]])
        out = model.predict([img, cur, seq, vhist])[0]
        nxt = int(np.argmax(out))
        ans = idx2word.get(nxt, '<?>')
        print(ans, 'score:', out[nxt])
        cur = np.array([[nxt]])
        tmp_vhist = np.array([[0] * vocab_size])
        tmp_vhist[0, nxt] = 1
        vhist = np.array([np.logical_or(vhist[0, :], tmp_vhist[0, :])])