コード例 #1
0
def do_proc():
    parser = get_parser()
    args_dict = parser.parse_args()
    args_dict.mode = 'test'
    args_dict.bs = 1
    args_dict.cnn_train = False
    args_dict.dr = True
    args_dict.bn = True
    args_dict.sgate = True
    args_dict.temperature = -1

    args_dict.model_file = 'h5-models/model-ep008-loss2.863-val_loss3.476.h5'

    model = get_model(args_dict)
    weights = args_dict.model_file
    model.load_weights(weights)
    print
    model.summary()
    model.compile(optimizer=None,
                  loss='categorical_crossentropy',
                  sample_weight_mode="temporal")

    dataloader = DataLoader(args_dict)
    N = args_dict.bs
    val_gen = dataloader.generator('test',
                                   batch_size=args_dict.bs,
                                   train_flag=False)  # N samples

    tmp_dir = os.path.join(args_dict.data_folder, 'tmp')

    cnn = model.layers[1]
    cnn.save_weights(os.path.join(tmp_dir, 'cnn.h5'), overwrite=True)
    lang_model = model.layers[3]
    lang_model.save_weights(os.path.join(tmp_dir, 'lang.h5'), overwrite=True)
    K.clear_session()

    wh = args_dict.convsize  # spatial dim of conv features
    dim = args_dict.nfilters  # number of channels
    seqlen = 1  # seqlen is 1 in test mode
    im_ph = Input(batch_shape=(args_dict.bs, args_dict.imsize,
                               args_dict.imsize, 3))
    cf_ph = Input(batch_shape=(args_dict.bs, wh, wh, dim))
    pw_ph = Input(batch_shape=(args_dict.bs, seqlen), name='prev_words')

    cnn = image_model(args_dict, im_ph)
    cnn.load_weights(os.path.join(tmp_dir, 'cnn.h5'))

    lang_model = language_model(args_dict, wh, dim, cf_ph, pw_ph)
    lang_model.load_weights(os.path.join(tmp_dir, 'lang.h5'))

    att_layer = 'att_scores'
    lang_model_att = Model(input=lang_model.input,
                           output=[
                               lang_model.get_layer('out').output,
                               lang_model.get_layer(att_layer).output
                           ])
    cnn.compile(optimizer=None,
                loss='categorical_crossentropy',
                sample_weight_mode="temporal")
    lang_model_att.compile(optimizer=None,
                           loss='categorical_crossentropy',
                           sample_weight_mode="temporal")

    vocab_file = os.path.join(args_dict.data_folder, 'data', args_dict.vfile)
    vocab = pickle.load(open(vocab_file, 'rb'))
    inv_vocab = {v: k for k, v in vocab.items()}

    figsize = (30, 30)

    # parameters to manipulate attention weights
    sig = 5
    th = 0.3

    IMPATH = os.path.join(args_dict.coco_path, 'images',
                          'val' + args_dict.year)
    count = 0

    for [batch_im, prevs], cap, _, imids in val_gen:
        # store all attention maps here

        conv_feats = cnn.predict_on_batch(batch_im)
        masks = np.zeros(
            (args_dict.seqlen, args_dict.imsize, args_dict.imsize))
        # first previous word is <start> (idx 1 in vocab)
        prevs = np.zeros((N, 1))

        # store all predicted words in sequence here
        word_idxs = np.zeros((N, args_dict.seqlen))

        imname = imids[0]['file_name']
        img = read_image(os.path.join(IMPATH, imname),
                         (args_dict.imsize, args_dict.imsize))

        # loop to get sequence of predicted words
        for i in range(args_dict.seqlen):

            preds, att = lang_model_att.predict_on_batch(
                [conv_feats, prevs])  # (N,1,vocab_size)
            # store predicted word and set previous word for next step
            preds = preds.squeeze()
            if args_dict.temperature > 0:
                preds = sample(preds, temperature=args_dict.temperature)
            word_idxs[:, i] = np.argmax(preds, axis=-1)
            prevs = np.argmax(preds, axis=-1)
            prevs = np.reshape(prevs, (N, 1))

            # attention map manipulation for display
            s_att = np.shape(att)[-1]
            att = np.reshape(att, (s_att, ))
            if args_dict.sgate:
                s_w = att[-1]  # sentinel weight
                att = att[:
                          -1]  # remove the sentinel weight from attention weights
                if s_w > 0.5:
                    continue  # if sentinel weight is higher, then black mask
            s = int(np.sqrt(s_att))
            att = np.reshape(att, (s, s))
            att = zoom(att, float(img.shape[0]) / att.shape[-1], order=1)
            att = gaussian_filter(att, sigma=sig)
            att = (att - (np.min(att))) / (np.max(att) - np.min(att))
            att[att > th] = 1
            att[att <= th] = 0.3
            masks[i] = att

        # find words for predicted word idxs
        pred_caps = idx2word(word_idxs, inv_vocab)
        true_caps = idx2word(np.argmax(cap, axis=-1), inv_vocab)

        # display predictions with attention maps
        n_words = len(pred_caps[0])
        f, axarr = plt.subplots(1, n_words, figsize=figsize)
        for i in range(n_words):
            im = copy.deepcopy(img)
            for c in range(3):
                im[:, :, c] = im[:, :, c] * masks[i]
            axarr[i].imshow(im)
            axarr[i].axis('off')
            axarr[i].set_title(pred_caps[0][i])

        plt.show()

        pred_cap = ' '.join(pred_caps[0])
        true_cap = ' '.join(true_caps[0])

        # true captions
        print("ID:", imids[0]['file_name'], imids[0]['id'])
        print("True:", true_cap)
        print("Gen:", pred_cap)

        lang_model_att.reset_states()
        count += 1
        if count > 10:
            break
コード例 #2
0
im = np.expand_dims(im, axis=0)

# Import model
#Weights https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3
if os.path.exists('test_modelVG.h5'):
    model = load_model('test_modelVG.h5')
else:
    model = VG(include_top=True,
               weights='imagenet',
               input_tensor=None,
               input_shape=None,
               pooling=None,
               classes=1000)
    model.save('test_modelVG.h5')

#VGG_16(weights_path='weights.h5')
optimizer = SGD()
model.compile(optimizer=optimizer, loss='categorical_crossentropy')
out = model.predict(im)

index = np.argmax(out)

i = np.argsort(out)

print("Max Prediction: " + item_dict[int(index)])
print("Other predictions in order:")
ind = 1
for ind in range(5):
    name = item_dict[int(i[0][-ind - 1])]
    print(str(ind) + ". " + name)