def user_img(df, vocab_path, url, cache = None, loc_only = True, **kwargs): # Generate a dataset that doesn't group users # We are only interested in stuff with locations with open(vocab_path) as f_vocab: vocab = dict([ (x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines()) ]) extractor = ImageFeatureExtractor(vocab, url, **kwargs) df = df.loc[pd.notnull(df['airport'])] X = extractor._transform(zip(df['mid'], df['url']), cache) df['feat'] = [ x for x in X ] # Group our posts by userid and combine the bow and locations into a single feature data = { 'uid': [], 'feat': [], 'loc': [] } for k,vs in df.groupby('uid').groups.iteritems(): locs = dict(df.loc[vs]['airport'].value_counts()).items() feats = reduce(lambda x,y: x + y, df.loc[vs]['feat']) # We are only interested in users with at least one location if len(locs) > 0: data['uid'].append(k) data['loc'].append(locs) data['feat'].append(feats) df_user = pd.DataFrame(data) if loc_only: df_user = df_user.loc[pd.notnull(df_user['loc'])] X = np.array([ x for x in df_user['feat'] ]) y_loc = df_user['loc'] print "User Dataset: %s, %s" % (str(X.shape), str(y_loc.shape)) extractor = ImageFeatureExtractor(vocab, url, **kwargs) return (X, y_loc, extractor)
def __init__(self, w_path, dec_path='dataset/text/dec_map.pkl', enc_path='dataset/text/enc_map.pkl', embedding_path='pre_trained/glove.6B.100d.txt'): dec_map = cPickle.load(open(dec_path, 'rb')) enc_map = cPickle.load(open(enc_path, 'rb')) embedding_matrix = generate_embedding_matrix(embedding_path, dec_map) self.model = image_caption_model(embedding_matrix=embedding_matrix) self.extractor = ImageFeatureExtractor('weights/tensorflow_inception_graph.pb') self.model.load_weights(w_path) self.dec_map = dec_map self.enc_map = enc_map
def post_img(df, vocab_path, url, cache = None, **kwargs): # Generate a dataset that doesn't group users # We are only interested in stuff with locations with open(vocab_path) as f_vocab: vocab = dict([ (x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines()) ]) extractor = ImageFeatureExtractor(vocab, url, **kwargs) df_img = df.loc[pd.notnull(df['airport'])] X = extractor._transform(zip(df_img['mid'], df_img['url']), cache) y_loc = df_img['airport'] print "Post Image Dataset" print X.shape print y_loc.shape print df_img.head() return (X, y_loc, extractor)
def post_img(df, vocab_path, url, cache=None, **kwargs): # Generate a dataset that doesn't group users # We are only interested in stuff with locations with open(vocab_path) as f_vocab: vocab = dict([(x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines())]) extractor = ImageFeatureExtractor(vocab, url, **kwargs) df_img = df.loc[pd.notnull(df['airport'])] X = extractor._transform(zip(df_img['mid'], df_img['url']), cache) y_loc = df_img['airport'] print "Post Image Dataset" print X.shape print y_loc.shape print df_img.head() return (X, y_loc, extractor)
def user_img(df, vocab_path, url, cache=None, loc_only=True, **kwargs): # Generate a dataset that doesn't group users # We are only interested in stuff with locations with open(vocab_path) as f_vocab: vocab = dict([(x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines())]) extractor = ImageFeatureExtractor(vocab, url, **kwargs) df = df.loc[pd.notnull(df['airport'])] X = extractor._transform(zip(df['mid'], df['url']), cache) df['feat'] = [x for x in X] # Group our posts by userid and combine the bow and locations into a single feature data = {'uid': [], 'feat': [], 'loc': []} for k, vs in df.groupby('uid').groups.iteritems(): locs = dict(df.loc[vs]['airport'].value_counts()).items() feats = reduce(lambda x, y: x + y, df.loc[vs]['feat']) # We are only interested in users with at least one location if len(locs) > 0: data['uid'].append(k) data['loc'].append(locs) data['feat'].append(feats) df_user = pd.DataFrame(data) if loc_only: df_user = df_user.loc[pd.notnull(df_user['loc'])] X = np.array([x for x in df_user['feat']]) y_loc = df_user['loc'] print "User Dataset: %s, %s" % (str(X.shape), str(y_loc.shape)) extractor = ImageFeatureExtractor(vocab, url, **kwargs) return (X, y_loc, extractor)
class ImageCaptionModel(object): def __init__(self, w_path, dec_path='dataset/text/dec_map.pkl', enc_path='dataset/text/enc_map.pkl', embedding_path='pre_trained/glove.6B.100d.txt'): dec_map = cPickle.load(open(dec_path, 'rb')) enc_map = cPickle.load(open(enc_path, 'rb')) embedding_matrix = generate_embedding_matrix(embedding_path, dec_map) self.model = image_caption_model(embedding_matrix=embedding_matrix) self.extractor = ImageFeatureExtractor('weights/tensorflow_inception_graph.pb') self.model.load_weights(w_path) self.dec_map = dec_map self.enc_map = enc_map def predict(self, img_path): img_feature = self.extractor.extract_features(img_path, flag_from_file=True) #print(img_feature) sentence = generate_k_best(self.model, self.enc_map, self.dec_map, img_feature, k=6, max_len=15) return sentence
from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import pickle import numpy as np from extractor import ImageFeatureExtractor from model import image_caption_model if __name__ == '__main__': max_sent_len = 28 model_path = './weights/v1.0.0_11_0_1494239663.5093253_602.h5' image_path = sys.argv[1] ife = ImageFeatureExtractor('model/inception_v3_2016_08_28_frozen.pb') with open('./train/word2idx.pkl', 'rb') as f: word2idx = pickle.load(f) with open('./train/idx2word.pkl', 'rb') as f: idx2word = pickle.load(f) vocab_size = len(word2idx) + 1 model = image_caption_model(vocab_size=vocab_size) model.load_weights(model_path) start_sign = word2idx['+'] img = np.array([ife.extract_features(image_path)]) cur, vhist, answer = np.array([[start_sign]]), np.array([[0] * vocab_size ]), [] vhist = np.array(vhist) for idx in range(0, max_sent_len):