Esempio n. 1
0
    def __init__(self, w_path, dec_path='dataset/text/dec_map.pkl',
                    enc_path='dataset/text/enc_map.pkl',
                    embedding_path='pre_trained/glove.6B.100d.txt'):

        dec_map = cPickle.load(open(dec_path, 'rb'))
        enc_map = cPickle.load(open(enc_path, 'rb'))

        embedding_matrix = generate_embedding_matrix(embedding_path, dec_map)
        self.model = image_caption_model(embedding_matrix=embedding_matrix)

        self.extractor = ImageFeatureExtractor('weights/tensorflow_inception_graph.pb')
        self.model.load_weights(w_path)
        self.dec_map = dec_map
        self.enc_map = enc_map
Esempio n. 2
0
def predict(num_of_pic, char_to_int, int_to_char, test_set):
    model = image_caption_model()
    model.load_weights('demo.h5v1.0.0_60_15_1496111479.97.h5')
    sentence = []
    imgs = []
    cur = char_to_int['$']
    img = test_set[num_of_pic]
    imgs.append(img)
    img_input = np.array(imgs)
    lang_input = cur
    #print img_input.shape
    #print np.array(lang_input).reshape(-1,1).shape
    for i in range(max_len):
        X = [img_input, np.array(lang_input).reshape(-1, 1)]
        prediction = model.predict(X)
        index = np.argmax(prediction)
        if (index == char_to_int['#']):
            break
        lang_input = index
        sentence.append(int_to_char[index])
    return sentence
Esempio n. 3
0
            test_dict[i] = d[i[len(images):]]
    print(len(test_img))
    return test_img, test_dict


if __name__ == '__main__':
    #path = sys.argv[1]
    dec_map = pickle.load(open("dec_map.pkl", 'rb'))
    enc_map = pickle.load(open("enc_map.pkl", 'rb'))

    dict = process()

    #img_train = pickle.load(open("/general/home/ronakchaudhary132199/Image-Captioning-master/encoded_images_inceptionV3.p", 'rb'))
    img_test = pickle.load(open("../encoded_images_test_inceptionV3.p", 'rb'))

    model = image_caption_model(vocab_size=8256, clipnorm=1.)
    model.load_weights("weights/v1.0.0_6_39_1524863089.8904815.h5")
    #model.load_weights("")")

    #eval_human(model, img_train, df_cap, enc_map, dec_map, k=1, size=40, max_len=13)
    '''
    print("____________________________________________________________________________________________________________________________")
    model.load_weights("/general/home/manish.singhal/attention/weights/v1.0.0_60_69_1523845852.7416637.h5")
    print(generate_k_best(model, enc_map, dec_map, img1, 3,13))
    print(generate_k_best(model, enc_map, dec_map, img2, 3,13))
    print(generate_k_best(model, enc_map, dec_map, img3, 3,13))
    print(generate_k_best(model, enc_map, dec_map, img4, 3,13))
    print(generate_k_best(model, enc_map, dec_map, img5, 3,13))
    '''

    ##===============================================================================================###
Esempio n. 4
0
import numpy as np
from extractor import ImageFeatureExtractor
from model import image_caption_model

if __name__ == '__main__':
    max_sent_len = 28
    model_path = './weights/v1.0.0_11_0_1494239663.5093253_602.h5'
    image_path = sys.argv[1]
    ife = ImageFeatureExtractor('model/inception_v3_2016_08_28_frozen.pb')

    with open('./train/word2idx.pkl', 'rb') as f:
        word2idx = pickle.load(f)
    with open('./train/idx2word.pkl', 'rb') as f:
        idx2word = pickle.load(f)
    vocab_size = len(word2idx) + 1
    model = image_caption_model(vocab_size=vocab_size)
    model.load_weights(model_path)
    start_sign = word2idx['+']

    img = np.array([ife.extract_features(image_path)])

    cur, vhist, answer = np.array([[start_sign]]), np.array([[0] * vocab_size
                                                             ]), []
    vhist = np.array(vhist)
    for idx in range(0, max_sent_len):
        seq = np.array([[1 if i == idx else 0
                         for i in range(0, max_sent_len)]])
        out = model.predict([img, cur, seq, vhist])[0]
        nxt = int(np.argmax(out))
        ans = idx2word.get(nxt, '<?>')
        print(ans, 'score:', out[nxt])
Esempio n. 5
0
    for x in ans:
        if x == 1 : break
        if x != 0 : gen.append(dec_map[x])
    return  ' '.join(gen)

def eval_human(model, img_map, df_cap, enc_map, dec_map, k=4, size=1, max_len=10):
    for idx in np.random.randint(df_cap.shape[0], size=size):
        row = df_cap.iloc[idx]
        cap = eval(row['caption'])
        img_id = row['img_id']
        img = img_map[img_id]
        gen = generate_k_best(model, enc_map, dec_map, img, k=k, max_len=max_len)
        print('[{}]'.format(img_id))
        print('[generated] {}'.format(gen))
        print('[groundtruth] {}'.format(' '.join([dec_map[cap[i]] for i in range(1,len(cap)-1)])))

if __name__ == '__main__':
    path = sys.argv[1]
    dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb'))
    enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb'))

    img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb'))
    img_test = cPickle.load(open('dataset/test_img2048.pkl', 'rb'))
    df_cap = pd.read_csv('dataset/text/train_enc_cap.csv')

    model = image_caption_model(clipnorm=1.)
    model.load_weights(path)

    eval_human(model, img_train, df_cap, enc_map, dec_map, k=1, size=40, max_len=13)

Esempio n. 6
0
    hist_path = 'history/'
    mdl_path = 'weights/'

    # read pkl
    dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb'))
    enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb'))

    img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb'))
    img_test = cPickle.load(open('dataset/test_img256.pkl', 'rb'))

    df_cap = pd.read_csv('dataset/text/train_enc_cap.csv')

    vocab_size = len(dec_map)
    embedding_matrix = generate_embedding_matrix(
        'pre_trained/glove.6B.100d.txt', dec_map)
    model = image_caption_model(vocab_size=vocab_size,
                                embedding_matrix=embedding_matrix)

    if len(sys.argv) >= 2:
        print('load weights from : {}'.format(sys.argv[1]))
        model.load_weights(sys.argv[1])

    # insert ur version name here
    version = 'v1.0.0'
    batch_num = 70
    print_summary(model.layers)

    hist_loss = []

    for i in range(0, 100):
        for j in range(0, batch_num):
            s = time.time()
Esempio n. 7
0
    '''
    # initialization
    hist_path = 'history/'
    mdl_path = 'weights/'

    # read pkl
    dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb'))
    enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb'))
    img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb'))
    img_test = cPickle.load(open('dataset/test_img256.pkl', 'rb'))
    df_cap = pd.read_csv('dataset/text/train_enc_cap.csv')
    '''
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    vocab_size = 11573
    #embedding_matrix = generate_embedding_matrix('pre_trained/glove.6B.100d.txt', dec_map)
    model = image_caption_model()

    if len(sys.argv) >= 2:
        print('load weights from : {}'.format(sys.argv[1]))
        model.load_weights(sys.argv[1])

    # insert ur version name here
    version = 'v1.0.0'
    batch_num = 30
    #print_summary(model.layers)

    hist_loss = []

    for i in range(0, 40):
        for j in range(0, batch_num):
            s = time.time()