コード例 #1
0
def extract_feature(params):
    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        image_root = params['coco_image_root']
    elif params['dataset'] == 'refgta':
        image_root = params['gta_image_root']
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['ann_feats'] = 'old' + params['ann_feats']

    loader = DataLoader(params)

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    res = L.ResNet152Layers()
    res.to_gpu(gpu_id)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    anns = loader.anns
    images = loader.Images
    perm = np.arange(len(anns))
    ann_feats = []
    for bs in tqdm(range(0, len(anns), batch_size)):
        batch = []
        for ix in perm[bs:bs + batch_size]:
            ann = anns[ix]
            h5_id = ann['h5_id']
            assert h5_id == ix, 'h5_id not match'
            img = images[ann['image_id']]
            x1, y1, w, h = ann['box']
            image = Image.open(os.path.join(image_root,
                                            img['file_name'])).convert('RGB')
            if h <= w:
                nh, nw = int(224 / w * h), 224
            else:
                nh, nw = 224, int(224 / h * w)
            image = image.crop((x1, y1, x1 + w, y1 + h)).resize(
                (nw, nh), Image.ANTIALIAS)
            image = np.array(image).astype(np.float32)[:, :, ::-1]
            image -= np.array([103.939, 116.779, 123.68], dtype=np.float32)
            image = image.transpose((2, 0, 1))
            pad_image = np.zeros((3, 224, 224), dtype=np.float32)
            if nh <= nw:
                pad_image[:, (224 - nh) // 2:(224 - nh) // 2 + nh, :] = image
            else:
                pad_image[:, :, (224 - nw) // 2:(224 - nw) // 2 + nw] = image
            batch.append(pad_image)
        batch = Variable(xp.array(batch, dtype=xp.float32))
        feature = res(batch, layers=['pool5'])
        feature = cuda.to_cpu(feature['pool5'].data)
        ann_feats.extend(feature)
    np.save(os.path.join(target_save_dir, params['ann_feats']), ann_feats)
コード例 #2
0
def extract_feature(params):
    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        image_root = params['coco_image_root']
    elif params['dataset'] == 'refgta':
        image_root = params['gta_image_root']
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['ann_feats'] = 'old' + params['ann_feats']

    loader = DataLoader(params)

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    res = L.ResNet152Layers()
    res.to_gpu(gpu_id)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    anns = loader.anns
    images = loader.Images
    perm = np.arange(len(anns))
    ann_feats = []
    shapes = []
    for bs in tqdm(range(0, len(anns), batch_size)):
        batch = []
        for ix in perm[bs:bs + batch_size]:
            ann = anns[ix]
            h5_id = ann['h5_id']
            assert h5_id == ix, 'h5_id not match'
            img = images[ann['image_id']]
            x1, y1, w, h = ann['box']
            image = Image.open(os.path.join(
                image_root, img['file_name'])).convert('RGB').crop(
                    (x1, y1, x1 + w, y1 + h))
            image, resize_shape = keep_asR_resize(image)
            shapes.append(resize_shape)
            image = np.array(image).astype(np.float32)[:, :, ::-1]
            image -= np.array([103.939, 116.779, 123.68], dtype=np.float32)
            image = image.transpose((2, 0, 1))
            batch.append(image)
        batch = Variable(xp.array(batch, dtype=xp.float32))
        feature = res(batch, layers=['res5'])
        feature = cuda.to_cpu(feature['res5'].data)
        ann_feats.extend(
            np.transpose(feature, (0, 2, 3, 1)).reshape(-1, 36, 2048))
    np.save(os.path.join(target_save_dir, params['sp_ann_feats']), ann_feats)
    np.save(os.path.join(target_save_dir, params['ann_shapes']), shapes)
コード例 #3
0
def extract_feature(params):

    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        image_root = params['coco_image_root']
    elif params['dataset'] == 'refgta':
        image_root = params['gta_image_root']
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']

    loader = DataLoader(params)
    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    res = L.ResNet152Layers()
    res.to_gpu(gpu_id)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    images = loader.images
    perm = np.arange(len(images))
    image_feats = []

    for bs in tqdm(range(0, len(images), batch_size)):
        batch = []
        for ix in perm[bs:bs + batch_size]:
            image = Image.open(
                os.path.join(image_root,
                             images[ix]['file_name'])).convert('RGB')
            if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
                image = image.resize((224, 224), Image.ANTIALIAS)
            else:
                image = image.resize((480, 288), Image.ANTIALIAS)
            image = np.array(image).astype(np.float32)[:, :, ::-1]
            image -= np.array([103.939, 116.779, 123.68], dtype=np.float32)
            image = image.transpose((2, 0, 1))
            batch.append(image)
        batch = Variable(xp.array(batch, dtype=xp.float32))
        feature = res(batch, layers=['res5'])
        feature = cuda.to_cpu(feature['res5'].data)
        if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
            image_feats.extend(
                np.transpose(feature, (0, 2, 3, 1)).reshape(-1, 49, 2048))
        else:
            image_feats.extend(
                np.transpose(feature, (0, 2, 3, 1)).reshape(-1, 135, 2048))

    np.save(os.path.join(target_save_dir, params['image_feats']), image_feats)
コード例 #4
0
def test_net(args):
    tf.set_random_seed(42)
    np.random.seed(42)

    loader = DataLoader(args.batch_size, args.input_folder, info_path=None, val_split=0, test=True)
    vgg16 = VGG16(trainable=False, layer='fc6')
    net = fishTest(vgg16, lr=None)

    saver = tf.train.Saver(max_to_keep=1)

    test_names = []
    test_probs = []
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        optimistic_restore(sess, args.model)

        iter_ix = 0
        while True:
            print('{}/{}'.format(iter_ix, loader.train_size))
            val_img, _, wrap, img_names = loader.next_batch()
            probs = sess.run(net.probs, feed_dict={net.input: val_img})
            iter_ix += args.batch_size
            if wrap:
                ix = -1*(iter_ix - loader.train_size)
                test_names += img_names[:ix]
                test_probs += probs.tolist()[:ix]
                break

            test_names += img_names 
            test_probs += probs.tolist()

        print(len(test_names))
        print(len(test_probs))

    with open(args.output_file, 'wb') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        for img_name, probs in zip(test_names, test_probs):
            writer.writerow([img_name] + probs)
コード例 #5
0
def train_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    graph_dir = osp.join('log_graph',
                         params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['sp_ann_feats'] = 'old' + params['sp_ann_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['ann_shapes'] = 'old' + params['ann_shapes']
        params['id'] = 'old' + params['id']
        params['word_emb_path'] = 'old' + params['word_emb_path']

    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        global_shapes = (224, 224)
    elif params['dataset'] == 'refgta':
        global_shapes = (480, 288)

    loader = DataLoader(params)

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy
    featsOpt = {
        'sp_ann': osp.join(target_save_dir, params['sp_ann_feats']),
        'ann_input': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats']),
        'shapes': osp.join(target_save_dir, params['ann_shapes'])
    }
    loader.loadFeats(featsOpt, mmap_mode=False)
    loader.shuffle('train')

    ve = VisualEncoder(res6=L.ResNet152Layers().fc6,
                       global_shapes=global_shapes).to_gpu(gpu_id)
    rl_crit = ListenerReward(len(loader.ix_to_word),
                             global_shapes=global_shapes).to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word),
                       loader.seq_length,
                       global_shapes,
                       res6=L.ResNet152Layers().fc6).to_gpu(gpu_id)

    serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit)

    ve_optim = optimizers.Adam(alpha=4e-5, beta1=0.8)
    lm_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)

    ve_optim.setup(ve)
    lm_optim.setup(lm)

    ve_optim.add_hook(chainer.optimizer.GradientClipping(params['grad_clip']))
    lm_optim.add_hook(chainer.optimizer.GradientClipping(params['grad_clip']))

    ## non-finetune layer
    ve.joint_enc.W.update_rule.hyperparam.alpha = 4e-4
    ve.joint_enc.b.update_rule.hyperparam.alpha = 4e-4
    lm.gaussian_p.x_var.update_rule.hyperparam.alpha = 1e-2
    lm.gaussian_p.y_var.update_rule.hyperparam.alpha = 1e-2
    ve.gaussian_p.x_var.update_rule.hyperparam.alpha = 1e-2
    ve.gaussian_p.y_var.update_rule.hyperparam.alpha = 1e-2

    iteration = 0
    epoch = 0
    lam = params['rank_lam']
    val_loss_history = []
    val_loss_lm_s_history = []
    val_loss_lm_l_history = []
    val_loss_l_history = []
    val_acc_history = []
    val_rank_acc_history = []
    min_val_loss = 100
    while True:
        chainer.config.train = True
        chainer.config.enable_backprop = True
        ve.zerograds()
        lm.zerograds()
        rl_crit.zerograds()

        start = time.time()

        data = loader.getBatch('train', params)

        ref_ann_ids = data['ref_ann_ids']

        pos_feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        pos_sp_cxt_feats = Variable(
            xp.array(data['sp_cxt_feats'], dtype=xp.float32))
        pos_sp_ann_feats = Variable(
            xp.array(data['sp_ann_feats'], dtype=xp.float32))

        neg_feats = Variable(xp.array(data['neg_feats'], dtype=xp.float32))
        neg_pos_sp_cxt_feats = Variable(
            xp.array(data['neg_sp_cxt_feats'], dtype=xp.float32))
        neg_pos_sp_ann_feats = Variable(
            xp.array(data['neg_sp_ann_feats'], dtype=xp.float32))
        local_shapes = np.concatenate([
            data['local_shapes'], data['neg_local_shapes'],
            data['local_shapes']
        ],
                                      axis=0)

        feats = F.concat([pos_feats, neg_feats, pos_feats], axis=0)
        sp_cxt_feats = F.concat(
            [pos_sp_cxt_feats, neg_pos_sp_cxt_feats, pos_sp_cxt_feats], axis=0)
        sp_ann_feats = F.concat(
            [pos_sp_ann_feats, neg_pos_sp_ann_feats, pos_sp_ann_feats], axis=0)
        seqz = np.concatenate([data['seqz'], data['seqz'], data['neg_seqz']],
                              axis=0)
        lang_last_ind = calc_max_ind(seqz)
        seqz = Variable(xp.array(seqz, dtype=xp.int32))

        coord = cuda.to_cpu(
            feats[:, sum(ve.feat_ind[:1]):sum(ve.feat_ind[:2])].data)
        local_sp_coord, global_sp_coord = calc_coordinate_feature(
            coord, local_shapes, global_shapes=global_shapes)
        local_sp_coord, global_sp_coord = xp.array(local_sp_coord,
                                                   dtype=xp.float32), xp.array(
                                                       global_sp_coord,
                                                       dtype=xp.float32)

        # encode vis feature
        vis_feats = ve(feats, sp_cxt_feats, coord)
        sp_feats, sp_feats_emb = lm.calc_spatial_features(
            sp_cxt_feats, sp_ann_feats, local_sp_coord, global_sp_coord)

        logprobs = lm(vis_feats, sp_feats, sp_feats_emb, coord, seqz,
                      lang_last_ind)

        # lang loss
        pairP, vis_unpairP, lang_unpairP = F.split_axis(logprobs, 3, axis=1)
        pair_num, _, lang_unpair_num = np.split(lang_last_ind, 3)
        num_labels = {'T': xp.array(pair_num), 'F': xp.array(lang_unpair_num)}
        lm_flows = {
            'T': pairP,
            'visF': [pairP, vis_unpairP],
            'langF': [pairP, lang_unpairP]
        }
        lm_loss = lm_crits(lm_flows,
                           num_labels,
                           params['lm_margin'],
                           vlamda=params['vis_rank_weight'],
                           llamda=params['lang_rank_weight'])

        # RL loss (pos,pos)
        rl_vis_feats = F.split_axis(vis_feats, 3, axis=0)[0]
        rl_coord = np.split(coord, 3, axis=0)[0]
        rl_sp_vis_feats = F.split_axis(sp_feats, 3, axis=0)[0]
        rl_sp_vis_emb = F.split_axis(sp_feats_emb, 3, axis=0)[0]
        sampled_seq, sample_log_probs = lm.sample(rl_vis_feats,
                                                  rl_sp_vis_feats,
                                                  rl_sp_vis_emb, rl_coord)
        sampled_lang_last_ind = calc_max_ind(sampled_seq)
        rl_loss = rl_crit(pos_feats, pos_sp_cxt_feats, rl_coord, sampled_seq,
                          sample_log_probs, sampled_lang_last_ind)

        loss = lm_loss + rl_loss
        print(lm_loss, rl_loss)

        if params['dataset'] == 'refgta' and params[
                'ranking'] and iteration > 8000:
            lam += 0.4 / 8000
            score = F.sum(pairP, axis=0) / (xp.array(pair_num + 1))
            rank_loss = calc_rank_loss(score, data['rank'], margin=0.01) * lam
            loss += rank_loss
        loss.backward()

        ve_optim.update()
        lm_optim.update()

        if data['bounds']['wrapped']:
            print('one epoch finished!')
            loader.shuffle('train')

        if iteration % params['losses_log_every'] == 0:
            acc = xp.where(rl_crit.reward > 0.5, 1, 0).mean()
            print('{} iter : train loss {}, acc : {} reward_mean : {}'.format(
                iteration, loss.data, acc, rl_crit.reward.mean()))

        if (iteration % params['save_checkpoint_every'] == 0
                and iteration > 0):
            chainer.config.train = False
            chainer.config.enable_backprop = False
            loader.resetImageIterator('val')
            loss_sum = 0
            loss_generation = 0
            loss_lm_margin = 0
            loss_evals = 0
            accuracy = 0
            rank_acc = 0
            rank_num = 0
            while True:
                data = loader.getImageBatch('val', params)
                image_id = data['image_id']
                img_ann_ids = data['img_ann_ids']
                sent_ids = data['sent_ids']
                gd_ixs = data['gd_ixs']
                feats = Variable(xp.array(data['feats'], dtype=xp.float32))
                sp_cxt_feats = Variable(
                    xp.array(data['sp_cxt_feats'], dtype=xp.float32))
                sp_ann_feats = Variable(
                    xp.array(data['sp_ann_feats'], dtype=xp.float32))
                local_shapes = data['local_shapes']
                seqz = data['seqz']
                lang_last_ind = calc_max_ind(seqz)
                scores = []
                for i, sent_id in enumerate(sent_ids):

                    gd_ix = gd_ixs[i]
                    labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
                    labels[gd_ix] = 1
                    labels = Variable(labels)

                    sent_seqz = np.concatenate(
                        [[seqz[i]] for _ in range(len(img_ann_ids))], axis=0)
                    one_last_ind = np.array([lang_last_ind[i]] *
                                            len(img_ann_ids))
                    sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))

                    coord = cuda.to_cpu(
                        feats[:,
                              sum(ve.feat_ind[:1]):sum(ve.feat_ind[:2])].data)
                    local_sp_coord, global_sp_coord = calc_coordinate_feature(
                        coord, local_shapes, global_shapes=global_shapes)
                    local_sp_coord, global_sp_coord = xp.array(
                        local_sp_coord,
                        dtype=xp.float32), xp.array(global_sp_coord,
                                                    dtype=xp.float32)
                    vis_enc_feats = ve(feats, sp_cxt_feats, coord)
                    sp_feats, sp_feats_emb = lm.calc_spatial_features(
                        sp_cxt_feats, sp_ann_feats, local_sp_coord,
                        global_sp_coord)

                    vis_feats = vis_enc_feats
                    logprobs = lm(vis_feats, sp_feats, sp_feats_emb, coord,
                                  sent_seqz, one_last_ind).data

                    gd_ix = gd_ixs[i]
                    lm_generation_loss = lm_crits(
                        {
                            'T': logprobs[:, gd_ix, xp.newaxis]
                        }, {
                            'T': one_last_ind[gd_ix, np.newaxis]
                        },
                        params['lm_margin'],
                        vlamda=0,
                        llamda=0).data
                    lm_scores = -computeLosses(logprobs, one_last_ind)
                    lm_margin_loss, pos_sc, max_neg_sc = compute_margin_loss(
                        lm_scores, gd_ix, params['lm_margin'])
                    scores.append(lm_scores[gd_ix])

                    loss_generation += lm_generation_loss
                    loss_lm_margin += lm_margin_loss
                    loss_sum += lm_generation_loss + lm_margin_loss
                    loss_evals += 1
                    if pos_sc > max_neg_sc:
                        accuracy += 1
                if params['dataset'] == 'refgta':
                    rank_a, rank_n = calc_rank_acc(scores, data['rank'])
                    rank_acc += rank_a
                    rank_num += rank_n
                print('{} iter | {}/{} validating acc : {}'.format(
                    iteration, data['bounds']['it_pos_now'],
                    data['bounds']['it_max'], accuracy / loss_evals))

                if data['bounds']['wrapped']:
                    print('validation finished!')
                    fin_val_loss = cuda.to_cpu(loss_sum / loss_evals)
                    loss_generation = cuda.to_cpu(loss_generation / loss_evals)
                    loss_lm_margin = cuda.to_cpu(loss_lm_margin / loss_evals)
                    fin_val_acc = accuracy / loss_evals
                    break
            val_loss_history.append(fin_val_loss)
            val_loss_lm_s_history.append(loss_generation)
            val_loss_lm_l_history.append(loss_lm_margin)
            val_acc_history.append(fin_val_acc)
            if min_val_loss > fin_val_loss:
                print('val loss {} -> {} improved!'.format(
                    min_val_loss, val_loss_history[-1]))
                min_val_loss = fin_val_loss
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "ve.h5"), ve)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "lm.h5"), lm)

            ## graph
            plt.title("accuracy")
            plt.plot(np.arange(len(val_acc_history)),
                     val_acc_history,
                     label="val_accuracy")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_acc.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_history,
                     label="all_loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_s_history,
                     label="generation_loss")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_loss.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_l_history,
                     label="lm_comp_loss")
            plt.legend()
            plt.savefig(
                os.path.join(
                    graph_dir,
                    params['id'] + params['id2'] + "_joint_comp_loss.png"))
            plt.close()

            if params['dataset'] == 'refgta':
                val_rank_acc_history.append(rank_acc / rank_num)
                plt.title("rank loss")
                plt.plot(np.arange(len(val_rank_acc_history)),
                         val_rank_acc_history,
                         label="rank_acc")
                plt.legend()
                plt.savefig(
                    os.path.join(
                        graph_dir,
                        params['id'] + params['id2'] + "_joint_rank_acc.png"))
                plt.close()

        if iteration > params['learning_rate_decay_start'] and params[
                'learning_rate_decay_start'] >= 0:
            frac = (iteration - params['learning_rate_decay_start']
                    ) / params['learning_rate_decay_every']
            decay_factor = math.pow(0.1, frac)
            ve_optim.alpha *= decay_factor
            lm_optim.alpha *= decay_factor

        iteration += 1
コード例 #6
0
ファイル: train.py プロジェクト: MonsKim999/XAI_Project
def train_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    graph_dir = osp.join('log_graph',
                         params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']
        params['word_emb_path'] = 'old' + params['word_emb_path']

    with open('setting.json', 'w') as f:
        json.dump(params, f)
    if not osp.isdir(graph_dir):
        os.mkdir(graph_dir)
    loader = DataLoader(params)

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    loader.shuffle('train')

    ve = VisualEncoder(res6=L.ResNet152Layers().fc6).to_gpu(gpu_id)
    if 'attention' in params['id']:
        print('attention language encoder')
        le = LanguageEncoderAttn(len(loader.ix_to_word))
        rl_crit = ListenerReward(len(loader.ix_to_word),
                                 attention=True).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word))
        rl_crit = ListenerReward(len(loader.ix_to_word),
                                 attention=False).to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word), loader.seq_length)
    if params['pretrained_w']:
        print('pretrained word embedding...')
        word_emb = load_vcab_init(
            loader.word_to_ix,
            osp.join(target_save_dir, params['word_emb_path']))
        le.word_emb.W.data = word_emb
        lm.word_emb = le.word_emb

    le.to_gpu(gpu_id)
    lm.to_gpu(gpu_id)
    serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit)

    ve_optim = optimizers.Adam(alpha=4e-5, beta1=0.8)
    le_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    cca_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    lm_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)

    ve_optim.setup(ve)
    le_optim.setup(le)
    cca_optim.setup(cca)
    lm_optim.setup(lm)

    ve_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    le_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    cca_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    lm_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    ve.joint_enc.W.update_rule.hyperparam.alpha = 4e-4
    ve.joint_enc.b.update_rule.hyperparam.alpha = 4e-4

    iteration = 0
    epoch = 0
    val_loss_history = []
    val_loss_lm_s_history = []
    val_loss_lm_l_history = []
    val_loss_l_history = []
    val_acc_history = []
    val_rank_acc_history = []
    min_val_loss = 100
    while True:
        chainer.config.train = True
        chainer.config.enable_backprop = True
        ve.zerograds()
        le.zerograds()
        cca.zerograds()
        lm.zerograds()
        rl_crit.zerograds()

        data = loader.getBatch('train', params)

        ref_ann_ids = data['ref_ann_ids']
        pos_feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        neg_feats = Variable(xp.array(data['neg_feats'], dtype=xp.float32))

        feats = F.concat([pos_feats, neg_feats, pos_feats], axis=0)
        seqz = np.concatenate([data['seqz'], data['seqz'], data['neg_seqz']],
                              axis=0)
        lang_last_ind = calc_max_ind(seqz)
        seqz = Variable(xp.array(seqz, dtype=xp.int32))

        vis_enc_feats = ve(feats)
        lang_enc_feats = le(seqz, lang_last_ind)
        cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
        vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)
        logprobs = lm(vis_feats, seqz, lang_last_ind)

        # emb loss
        pairSim, vis_unpairSim, lang_unpairSim = F.split_axis(cossim,
                                                              3,
                                                              axis=0)
        emb_flows = {
            'vis': [pairSim, vis_unpairSim],
            'lang': [pairSim, lang_unpairSim]
        }
        emb_loss = emb_crits(emb_flows, params['emb_margin'])

        # lang loss
        pairP, vis_unpairP, lang_unpairP = F.split_axis(logprobs, 3, axis=1)
        pair_num, _, lang_unpair_num = np.split(lang_last_ind, 3)
        num_labels = {'T': pair_num, 'F': lang_unpair_num}
        lm_flows = {
            'T': pairP,
            'visF': [pairP, vis_unpairP],
            'langF': [pairP, lang_unpairP]
        }
        lm_loss = lm_crits(lm_flows,
                           num_labels,
                           params['lm_margin'],
                           vlamda=params['vis_rank_weight'],
                           llamda=params['lang_rank_weight'])

        # RL loss (pos,pos)のみ
        rl_vis_feats = F.split_axis(vis_feats, 3, axis=0)[0]
        sampled_seq, sample_log_probs = lm.sample(rl_vis_feats)
        sampled_lang_last_ind = calc_max_ind(sampled_seq)
        rl_loss = rl_crit(pos_feats, sampled_seq, sample_log_probs,
                          sampled_lang_last_ind)  #, lm.baseline)

        loss = emb_loss + lm_loss + rl_loss
        print(emb_loss, lm_loss, rl_loss)

        loss.backward()

        ve_optim.update()
        le_optim.update()
        cca_optim.update()
        lm_optim.update()

        if data['bounds']['wrapped']:
            print('one epoch finished!')
            loader.shuffle('train')

        if params['check_sent']:
            sampled_sents = loader.decode_sequence(cuda.to_cpu(sampled_seq),
                                                   sampled_lang_last_ind)
            for i in range(len(sampled_sents)):
                print('sampled sentence : ', ' '.join(sampled_sents[i]))
                print('reward : ', rl_crit.reward[i])

        if iteration % params['losses_log_every'] == 0:
            acc = xp.where(rl_crit.reward > 0.5, 1, 0).mean()
            print('{} iter : train loss {}, acc : {}, reward_mean : {}'.format(
                iteration, loss.data, acc, rl_crit.reward.mean()))

        if iteration % params[
                'mine_hard_every'] == 0 and iteration > 0 and params[
                    'mine_hard']:
            make_graph(ve, cca, loader, 'train', params, xp)

        if (iteration % params['save_checkpoint_every'] == 0
                and iteration > 0):
            chainer.config.train = False
            chainer.config.enable_backprop = False
            loader.resetImageIterator('val')
            loss_sum = 0
            loss_generation = 0
            loss_lm_margin = 0
            loss_emb_margin = 0
            loss_evals = 0
            accuracy = 0
            rank_acc = 0
            rank_num = 0
            while True:
                data = loader.getImageBatch('val', params)
                image_id = data['image_id']
                img_ann_ids = data['img_ann_ids']
                sent_ids = data['sent_ids']
                gd_ixs = data['gd_ixs']
                feats = Variable(xp.array(data['feats'], dtype=xp.float32))
                seqz = data['seqz']
                lang_last_ind = calc_max_ind(seqz)
                scores = []
                for i, sent_id in enumerate(sent_ids):
                    gd_ix = gd_ixs[i]
                    labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
                    labels[gd_ix] = 1
                    labels = Variable(labels)

                    sent_seqz = np.concatenate(
                        [[seqz[i]] for _ in range(len(img_ann_ids))], axis=0)
                    one_last_ind = np.array([lang_last_ind[i]] *
                                            len(img_ann_ids))
                    sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))

                    vis_enc_feats = ve(feats)
                    lang_enc_feats = le(sent_seqz, one_last_ind)
                    cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
                    vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)
                    logprobs = lm(vis_feats, sent_seqz, one_last_ind).data

                    gd_ix = gd_ixs[i]
                    lm_generation_loss = lm_crits(
                        {
                            'T': logprobs[:, gd_ix, xp.newaxis]
                        }, {
                            'T': one_last_ind[gd_ix, np.newaxis]
                        },
                        params['lm_margin'],
                        vlamda=0,
                        llamda=0).data

                    lm_scores = -computeLosses(logprobs, one_last_ind)
                    lm_margin_loss, _, _ = compute_margin_loss(
                        lm_scores, gd_ix, params['lm_margin'])
                    scores.append(lm_scores[gd_ix])

                    emb_margin_loss, pos_sc, max_neg_sc = compute_margin_loss(
                        cossim.data, gd_ix, params['emb_margin'])
                    loss_generation += lm_generation_loss
                    loss_lm_margin += lm_margin_loss
                    loss_emb_margin += emb_margin_loss
                    loss_sum += lm_generation_loss + lm_margin_loss + emb_margin_loss
                    loss_evals += 1
                    if pos_sc > max_neg_sc:
                        accuracy += 1
                if params['dataset'] == 'refgta':
                    rank_a, rank_n = calc_rank_acc(scores, data['rank'])
                    rank_acc += rank_a
                    rank_num += rank_n
                print('{} iter | {}/{} validating acc : {}'.format(
                    iteration, data['bounds']['it_pos_now'],
                    data['bounds']['it_max'], accuracy / loss_evals))

                if data['bounds']['wrapped']:
                    print('validation finished!')
                    fin_val_loss = cuda.to_cpu(loss_sum / loss_evals)
                    loss_generation = cuda.to_cpu(loss_generation / loss_evals)
                    loss_lm_margin = cuda.to_cpu(loss_lm_margin / loss_evals)
                    loss_emb_margin = cuda.to_cpu(loss_emb_margin / loss_evals)
                    fin_val_acc = accuracy / loss_evals
                    break
            val_loss_history.append(fin_val_loss)
            val_loss_lm_s_history.append(loss_generation)
            val_loss_lm_l_history.append(loss_lm_margin)
            val_loss_l_history.append(loss_emb_margin)
            val_acc_history.append(fin_val_acc)
            if min_val_loss > fin_val_loss:
                print('val loss {} -> {} improved!'.format(
                    min_val_loss, val_loss_history[-1]))
                min_val_loss = fin_val_loss
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "ve.h5"), ve)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "le.h5"), le)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "cca.h5"), cca)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "lm.h5"), lm)

            ## graph
            plt.title("accuracy")
            plt.plot(np.arange(len(val_acc_history)),
                     val_acc_history,
                     label="val_accuracy")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_acc.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_history,
                     label="all_loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_s_history,
                     label="generation_loss")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_loss.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_l_history,
                     label="lm_comp_loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_l_history,
                     label="comp_loss")
            plt.legend()
            plt.savefig(
                os.path.join(
                    graph_dir,
                    params['id'] + params['id2'] + "_joint_comp_loss.png"))
            plt.close()

            if params['dataset'] == 'refgta':
                print(rank_num)
                val_rank_acc_history.append(rank_acc / rank_num)
                plt.title("rank loss")
                plt.plot(np.arange(len(val_rank_acc_history)),
                         val_rank_acc_history,
                         label="rank_acc")
                plt.legend()
                plt.savefig(
                    os.path.join(
                        graph_dir,
                        params['id'] + params['id2'] + "_rank_acc.png"))
                plt.close()

        if iteration > params['learning_rate_decay_start'] and params[
                'learning_rate_decay_start'] >= 0:
            frac = (iteration - params['learning_rate_decay_start']
                    ) / params['learning_rate_decay_every']
            decay_factor = math.pow(0.1, frac)
            ve_optim.alpha *= decay_factor
            le_optim.alpha *= decay_factor
            cca_optim.alpha *= decay_factor
            lm_optim.alpha *= decay_factor

        iteration += 1
コード例 #7
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])
    result_dir = osp.join('result',
                          params['dataset'] + '_' + params['splitBy'])

    if not osp.isdir(result_dir):
        os.makedirs(result_dir)

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats_h5'] = 'old' + params['image_feats_h5']
        params['ann_feats_h5'] = 'old' + params['ann_feats_h5']
        params['id'] = 'old' + params['id']

    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        global_shapes = (224, 224)
    elif params['dataset'] == 'refgta':
        global_shapes = (480, 288)

    loader = DataLoader(params)

    featsOpt = {
        'sp_ann': osp.join(target_save_dir, params['sp_ann_feats']),
        'ann_input': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats']),
        'shapes': osp.join(target_save_dir, params['ann_shapes'])
    }
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    ve = VisualEncoder(global_shapes=global_shapes).to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word), loader.seq_length,
                       global_shapes).to_gpu(gpu_id)

    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm)

    predictions = []
    beam_all_results = []
    while True:
        data = loader.getTestBatch(params['split'], params)
        ref_ids = data['ref_ids']
        lang_last_ind = calc_max_ind(data['seqz'])
        feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        sp_cxt_feats = Variable(
            xp.array(data['sp_cxt_feats'], dtype=xp.float32))
        sp_ann_feats = Variable(
            xp.array(data['sp_ann_feats'], dtype=xp.float32))
        local_shapes = data['local_shapes']
        coord = data['feats'][:, sum(ve.feat_ind[:1]):sum(ve.feat_ind[:2])]
        local_sp_coord, global_sp_coord = calc_coordinate_feature(
            coord, local_shapes, global_shapes=global_shapes)
        local_sp_coord, global_sp_coord = xp.array(local_sp_coord,
                                                   dtype=xp.float32), xp.array(
                                                       global_sp_coord,
                                                       dtype=xp.float32)

        vis_enc_feats = ve(feats, sp_cxt_feats, coord)
        vis_feats = vis_enc_feats
        sp_feats, sp_feats_emb = lm.calc_spatial_features(
            sp_cxt_feats, sp_ann_feats, local_sp_coord, global_sp_coord)
        if params['beam_width'] == 1:
            results = lm.max_sample(vis_feats)
        else:
            beam_results, _ = beam_search(lm, vis_feats, sp_feats,
                                          sp_feats_emb, coord,
                                          params['beam_width'])

            results = [result[0]['sent'] for result in beam_results]
            ppls = [result[0]['ppl'] for result in beam_results]

        for i, result in enumerate(results):
            gen_sentence = ' '.join(
                [loader.ix_to_word[str(w)] for w in result])
            if params['beam_width'] == 1:
                print(gen_sentence)
            else:
                print(gen_sentence, ', ppl : ', ppls[i])
            entry = {'ref_id': ref_ids[i], 'sent': gen_sentence}
            predictions.append(entry)
            if params['beam_width'] > 1:
                beam_all_results.append({
                    'ref_id': ref_ids[i],
                    'beam': beam_results[i]
                })
        print('evaluating validation performance... {}/{}'.format(
            data['bounds']['it_pos_now'], data['bounds']['it_max']))

        if data['bounds']['wrapped']:
            print('validation finished!')
            break
    lang_stats = language_eval(predictions, params['split'], params)
    print(lang_stats)

    print('sentence mean length: ',
          np.mean([len(pred['sent'].split()) for pred in predictions]))
    with open(
            result_dir + '/' + params['id'] + params['id2'] +
            str(params['beam_width']) + params['split'] + 'raw.json',
            'w') as f:
        json.dump(predictions, f)
    with open(
            result_dir + '/' + params['id'] + params['id2'] +
            str(params['beam_width']) + params['split'] + '.json', 'w') as f:
        json.dump(lang_stats, f)
    with open(
            result_dir + '/' + params['id'] + params['id2'] +
            str(params['beam_width']) + params['split'] + 'all_beam.json',
            'w') as f:
        json.dump(beam_all_results, f)
コード例 #8
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])
    batch_size = params['batch_size']
    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']

    loader = DataLoader(params)

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    if 'attention' in params['id']:
        print('attn')
        le = LanguageEncoderAttn(len(loader.ix_to_word)).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word)).to_gpu(gpu_id)
    ve = VisualEncoder().to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word),
                       loader.seq_length).to_gpu(gpu_id)

    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "le.h5"), le)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "cca.h5"), cca)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm)

    for num, entry in enumerate(train_entries):
        print("{}/{}".format(num, len(train_entries)))

        image_id = entry['image_id']
        idx = train_iid2id[image_id]

        features = all_image_feats[idx]
        boxes = all_boxes[idx]
        referring_expression = []

        for m in range(36):
            bbox = boxes[m]
            bbox[2] -= bbox[0]
            bbox[3] -= bbox[1]
            boxes[m] = bbox

        for k in range(36):

            feats = fetch_feats(entry, features, boxes, loader, k, params)
            feats = Variable(xp.array(feats, dtype=xp.float32))

            vis_enc_feats = ve(feats)
            lang_enc_feats = vis_enc_feats

            _, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
            vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)

            beam_results = beam_search(lm, vis_feats, params['beam_width'])
            results = [result['sent'] for result in beam_results[0]]

            results = results[:3]
            gen_sentence = []
            for i, result in enumerate(results):
                gen_sentence.append(' '.join(
                    [loader.ix_to_word[str(w)] for w in result]))
            referring_expression.append(gen_sentence)
        entry['object_captions'] = referring_expression
    pickle.dump(train_entries, open('VQA_ref_testdataset_v3.pkl', 'wb'))
コード例 #9
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']

    loader = DataLoader(params)

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    if 'attention' in params['id']:
        print('attn')
        le = LanguageEncoderAttn(len(loader.ix_to_word)).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word)).to_gpu(gpu_id)
    ve = VisualEncoder().to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word),
                       loader.seq_length).to_gpu(gpu_id)

    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "le.h5"), le)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "cca.h5"), cca)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm)

    predictions = []
    beam_all_results = []
    while True:
        data = loader.getTestBatch(params['split'], params)
        ref_ids = data['ref_ids']
        image_id = data['image_id']
        lang_last_ind = calc_max_ind(data['seqz'])
        feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        vis_enc_feats = ve(feats)
        lang_enc_feats = vis_enc_feats  ##fake
        _, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
        vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)

        if params['beam_width'] == 1:
            results = lm.max_sample(vis_feats)
        else:
            beam_results = beam_search(lm, vis_feats, params['beam_width'])
            results = [result[0]['sent'] for result in beam_results]
            ppls = [result[0]['ppl'] for result in beam_results]

        for i, result in enumerate(results):
            gen_sentence = ' '.join(
                [loader.ix_to_word[str(w)] for w in result])
            if params['beam_width'] == 1:
                print(gen_sentence)
            else:
                print(gen_sentence, 'image_id : ', image_id)
            entry = {'ref_id': ref_ids[i], 'sent': gen_sentence}
            predictions.append(entry)
            if params['beam_width'] > 1:
                beam_all_results.append({
                    'ref_id': ref_ids[i],
                    'beam': beam_results[i]
                })
        print('evaluating validation performance... {}/{}'.format(
            data['bounds']['it_pos_now'], data['bounds']['it_max']))

        if data['bounds']['wrapped']:
            print('validation finished!')
            break
    lang_stats = language_eval(predictions, params['split'], params)
    print(lang_stats)
コード例 #10
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'],'prepro', params['dataset']+'_'+params['splitBy'])
    model_dir = osp.join(params['save_dir'],'model', params['dataset']+'_'+params['splitBy'])
    
    if params['old'] and params['dataset'] in ['refcoco','refcoco+','refcocog']:
        params['data_json'] = 'old'+params['data_json']
        params['data_h5'] = 'old'+params['data_h5']
        params['image_feats'] = 'old'+params['image_feats']
        params['ann_feats'] = 'old'+params['ann_feats']
        params['id'] = 'old'+params['id']
        
    loader = DataLoader(params)
    
    featsOpt = {'ann':osp.join(target_save_dir, params['ann_feats']),
                'img':osp.join(target_save_dir, params['image_feats'])}
    loader.loadFeats(featsOpt) 
    chainer.config.train = False
    chainer.config.enable_backprop = False
    
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy
    
    ve = VisualEncoder().to_gpu(gpu_id)
    if 'attention' in params['id']:
        print('attn')
        le = LanguageEncoderAttn(len(loader.ix_to_word)).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word)).to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm  = LanguageModel(len(loader.ix_to_word), loader.seq_length).to_gpu(gpu_id)
    
    serializers.load_hdf5(osp.join(model_dir, params['id']+params['id2']+"ve.h5"), ve)
    serializers.load_hdf5(osp.join(model_dir, params['id']+params['id2']+"le.h5"), le)
    serializers.load_hdf5(osp.join(model_dir, params['id']+params['id2']+"cca.h5"), cca)
    serializers.load_hdf5(osp.join(model_dir, params['id']+params['id2']+"lm.h5"), lm)
    
    accuracy = 0
    loss_evals  = 0
    while True:
        data = loader.getImageBatch(params['split'], params)
        image_id = data['image_id']
        img_ann_ids = data['img_ann_ids']
        sent_ids = data['sent_ids']
        gd_ixs = data['gd_ixs']
        feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        seqz = data['seqz']
        lang_last_ind = calc_max_ind(seqz)
        for i, sent_id in enumerate(sent_ids):
            gd_ix = gd_ixs[i]
            labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
            labels[gd_ix] = 1
            labels = Variable(labels)

            sent_seqz = np.concatenate([[seqz[i]] for _ in range(len(img_ann_ids))],axis=0)
            one_last_ind =  np.array([lang_last_ind[i]]*len(img_ann_ids))
            sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))
                
            vis_enc_feats = ve(feats)
            lang_enc_feats = le(sent_seqz, one_last_ind)
            cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
            vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)
            logprobs = lm(vis_feats, sent_seqz, one_last_ind).data
            
            lm_scores = -computeLosses(logprobs, one_last_ind)  
            
            if params['mode']==0:
                _, pos_sc, max_neg_sc = compute_margin_loss(lm_scores, gd_ix, 0)
            elif params['mode']==1:
                _, pos_sc, max_neg_sc = compute_margin_loss(cossim.data, gd_ix, 0)
            elif params['mode']==2:
                scores = cossim.data + params['lamda'] * lm_scores
                _, pos_sc, max_neg_sc = compute_margin_loss(scores, gd_ix, 0)
            if pos_sc > max_neg_sc:
                accuracy += 1
            loss_evals += 1
            print('{}-th: evaluating [{}]  ... image[{}/{}] sent[{}], acc={}'.format(loss_evals, params['split'], data['bounds']['it_pos_now'], data['bounds']['it_max'], i, accuracy*100.0/loss_evals))
        
        if data['bounds']['wrapped']:
            print('validation finished!')
            f = open('result/'+params['dataset']+params['split']+params['id']+str(params['mode'])+str(params['lamda'])+'comp.txt', 'w') # 書き込みモードで開く
            f.write(str(accuracy*100.0/loss_evals)) # 引数の文字列をファイルに書き込む
            f.close() 
            break
コード例 #11
0
def train_vl(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    graph_dir = osp.join('log_graph',
                         params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])
    if not osp.isdir(graph_dir):
        os.makedirs(graph_dir)
    if not osp.isdir(model_dir):
        os.makedirs(model_dir)

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']
        params['word_emb_path'] = 'old' + params['word_emb_path']

    loader = DataLoader(params)

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    loader.shuffle('train')

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    seq_per_ref = params['seq_per_ref']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    ve = VisualEncoder(res6=L.ResNet152Layers().fc6).to_gpu(gpu_id)
    if 'attention' in params['id']:
        print('attention language encoder')
        le = LanguageEncoderAttn(len(loader.ix_to_word))
        save_model = ListenerReward(len(loader.ix_to_word), attention=True)
    else:
        le = LanguageEncoder(len(loader.ix_to_word))
        save_model = ListenerReward(len(loader.ix_to_word), attention=False)

    if params['pretrained_w']:
        print('pretrained word embedding...')
        word_emb = load_vcab_init(
            loader.word_to_ix,
            osp.join(target_save_dir, params['word_emb_path']))
        le.word_emb.W.data = word_emb
    le.to_gpu(gpu_id)
    me = MetricNet().to_gpu(gpu_id)

    ve_optim = optimizers.Adam(alpha=4e-5, beta1=0.8)
    le_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    me_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    ve_optim.setup(ve)
    le_optim.setup(le)
    me_optim.setup(me)

    ve_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    le_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    me_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    ve.joint_enc.W.update_rule.hyperparam.alpha = 4e-4
    ve.joint_enc.b.update_rule.hyperparam.alpha = 4e-4

    iteration = 0
    epoch = 0
    val_loss_history = []
    val_acc_history = []
    val_rank_acc_history = []
    min_val_loss = 100
    max_acc = 0
    while True:
        chainer.config.train = True
        chainer.config.enable_backprop = True
        ve.zerograds()
        le.zerograds()
        me.zerograds()
        data = loader.getBatch('train', params)
        ref_ann_ids = data['ref_ann_ids']

        pos_feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        neg_feats = Variable(xp.array(data['neg_feats'], dtype=xp.float32))
        feats = F.concat([pos_feats, neg_feats, pos_feats], axis=0)

        seqz = np.concatenate([data['seqz'], data['seqz'], data['neg_seqz']],
                              axis=0)
        lang_last_ind = calc_max_ind(seqz)
        seqz = Variable(xp.array(seqz, dtype=xp.int32))
        labels = Variable(
            xp.concatenate([
                xp.ones((batch_size * seq_per_ref)),
                xp.zeros((batch_size * seq_per_ref)),
                xp.zeros((batch_size * seq_per_ref))
            ]).astype(xp.int32))

        vis_enc_feats = ve(feats)
        lang_enc_feats = le(seqz, lang_last_ind)
        score = me(vis_enc_feats, lang_enc_feats).reshape(labels.shape)

        loss = F.sigmoid_cross_entropy(score, labels)
        loss.backward()
        ve_optim.update()
        le_optim.update()
        me_optim.update()

        if data['bounds']['wrapped']:
            print('{} epoch finished!'.format(epoch))
            loader.shuffle('train')
            epoch += 1

        if iteration % params['losses_log_every'] == 0:
            print('{} iter ({} epoch): train loss {}'.format(
                iteration, epoch, loss.data))

        ## validation
        if (iteration % params['save_checkpoint_every'] == 0
                and iteration > 0):
            chainer.config.train = False
            chainer.config.enable_backprop = False
            loader.resetImageIterator('val')
            loss_sum = 0
            loss_evals = 0
            accuracy = 0
            rank_acc = 0
            rank_num = 0
            while True:
                data = loader.getImageBatch('val', params)
                image_id = data['image_id']
                img_ann_ids = data['img_ann_ids']
                sent_ids = data['sent_ids']
                gd_ixs = data['gd_ixs']
                feats = Variable(xp.array(data['feats'], dtype=xp.float32))
                seqz = data['seqz']
                scores = []
                for i, sent_id in enumerate(sent_ids):
                    ## image内の全ての候補領域とscoreを算出する
                    gd_ix = gd_ixs[i]
                    labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
                    labels[gd_ix] = 1
                    labels = Variable(labels)

                    sent_seqz = np.concatenate(
                        [[seqz[i]] for _ in range(len(img_ann_ids))], axis=0)
                    lang_last_ind = calc_max_ind(sent_seqz)
                    sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))

                    vis_enc_feats = ve(feats)
                    lang_enc_feats = le(sent_seqz, lang_last_ind)
                    score = me(vis_enc_feats,
                               lang_enc_feats).reshape(labels.shape)
                    loss = F.sigmoid_cross_entropy(score, labels)
                    scores.append(score[gd_ix].data)

                    loss_sum += loss.data
                    loss_evals += 1
                    _, pos_sc, max_neg_sc = compute_margin_loss(
                        score.data, gd_ix, 0)
                    if pos_sc > max_neg_sc:
                        accuracy += 1

                if params['dataset'] == 'refgta':
                    rank_a, rank_n = calc_rank_acc(scores, data['rank'])
                    rank_acc += rank_a
                    rank_num += rank_n
                print('{} iter | {}/{} validating acc : {}'.format(
                    iteration, data['bounds']['it_pos_now'],
                    data['bounds']['it_max'], accuracy / loss_evals))

                if data['bounds']['wrapped']:
                    print('validation finished!')
                    fin_val_loss = cuda.to_cpu(loss_sum / loss_evals)
                    fin_val_acc = accuracy / loss_evals
                    break
            val_loss_history.append(fin_val_loss)
            val_acc_history.append(fin_val_acc)
            if min_val_loss > fin_val_loss:
                print('val loss {} -> {} improved!'.format(
                    min_val_loss, val_loss_history[-1]))
                min_val_loss = fin_val_loss

            if max_acc < fin_val_acc:
                max_acc = fin_val_acc
                save_model.ve = ve
                save_model.le = le
                save_model.me = me
                serializers.save_hdf5(
                    osp.join(model_dir, params['id'] + ".h5"), save_model)

            ## graph
            plt.title("accuracy")
            plt.plot(np.arange(len(val_acc_history)),
                     val_acc_history,
                     label="val_accuracy")
            plt.legend()
            plt.savefig(os.path.join(graph_dir, params['id'] + "_acc.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_history,
                     label="val_loss")
            plt.legend()
            plt.savefig(os.path.join(graph_dir, params['id'] + "_loss.png"))
            plt.close()

            if params['dataset'] == 'refgta':
                print(rank_num)
                val_rank_acc_history.append(rank_acc / rank_num)
                plt.title("rank loss")
                plt.plot(np.arange(len(val_rank_acc_history)),
                         val_rank_acc_history,
                         label="rank_acc")
                plt.legend()
                plt.savefig(
                    os.path.join(graph_dir, params['id'] + "_rank_acc.png"))
                plt.close()

        # learning rate decay
        if iteration > params['learning_rate_decay_start'] and params[
                'learning_rate_decay_start'] >= 0:
            frac = (iteration - params['learning_rate_decay_start']
                    ) / params['learning_rate_decay_every']
            decay_factor = math.pow(0.1, frac)
            ve_optim.alpha *= decay_factor
            le_optim.alpha *= decay_factor
            me_optim.alpha *= decay_factor

        iteration += 1
コード例 #12
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])
    batch_size = params['batch_size']
    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']

    loader = DataLoader(params)

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']

    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    if 'attention' in params['id']:
        print('attn')
        le = LanguageEncoderAttn(len(loader.ix_to_word)).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word)).to_gpu(gpu_id)
    ve = VisualEncoder().to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word),
                       loader.seq_length).to_gpu(gpu_id)

    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "le.h5"), le)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "cca.h5"), cca)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm)

    # train: 82783 , val: 40504, test: 81434
    add_feats = np.zeros([81434, 36, 1024])

    for num, entry in enumerate(train_entries):
        print("{}/{}".format(num, len(train_entries)))

        image_id = entry['image_id']
        idx = train_iid2id[image_id]

        features = all_image_feats[idx]
        boxes = all_boxes[idx]

        for m in range(36):
            bbox = boxes[m]
            bbox[2] -= bbox[0]
            bbox[3] -= bbox[1]
            boxes[m] = bbox

        for k in range(36):

            # feat 's shape: [1, 6249]
            feats = fetch_feats(entry, features, boxes, loader, k, params)
            feats = Variable(xp.array(feats, dtype=xp.float32))

            # vis_enc_featus 's shape: [1, 512]
            vis_enc_feats = ve(feats)

            # lang_enc_featus 's shape: [1, 512]
            lang_enc_feats = vis_enc_feats

            # vis_emb_feats 's shape: [1, 512]
            _, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)

            # vis_feats 's shape: [1, 1024]
            vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)

            # add_feats = np.zeros([82783, 36, 1024])
            vis_feats = chainer.cuda.to_cpu(vis_feats.data)
            add_feats[idx][k] = np.array(vis_feats)

    # all_image_feats = cxt_features.get('image_features')
    # all_sp_feats = cxt_features.get('spatial_features')
    # all_boxes = cxt_features.get('image_bb')
    with h5py.File('new_hdf5/add_test36.hdf5', 'w') as hf:
        hf.create_dataset('image_features',
                          data=all_image_feats,
                          maxshape=(82783, 36, 2048))
        hf.create_dataset('spatial_features',
                          data=all_sp_feats,
                          maxshape=(82783, 36, 6))
        hf.create_dataset('image_bb', data=all_boxes, maxshape=(82783, 36, 4))
        hf.create_dataset('additional_feats',
                          data=add_feats,
                          maxshape=(82783, 36, 1024))
コード例 #13
0
ファイル: eval_rerank.py プロジェクト: MonsKim999/XAI_Project
def main(params):
    target_save_dir = osp.join(params['save_dir'],'prepro', params['dataset']+'_'+params['splitBy'])
    
    if params['old']:
        params['data_json'] = 'old'+params['data_json']
        params['data_h5'] = 'old'+params['data_h5']
        params['image_feats_h5'] = 'old'+params['image_feats_h5']
        params['ann_feats_h5'] = 'old'+params['ann_feats_h5']
        params['id'] = 'old'+params['id']
        
    with open(target_save_dir+params["split"]+'_'+params['id']+str(params['beam_width'])+'.json') as f:
        data =  json.load(f)
    ref_to_beams = {item['ref_id']: item['beam'] for item in data}
    
    # add ref_id to each beam
    for ref_id, beams in ref_to_beams.items():
        for beam in beams:  
            beam['ref_id'] = ref_id  # make up ref_id in beam
            
    loader = DataLoader(params)
    featsOpt = {'ann':osp.join(target_save_dir, params['ann_feats_h5']),
                'img':osp.join(target_save_dir, params['image_feats_h5'])}
    loader.loadFeats(featsOpt) 
    chainer.config.train = False
    chainer.config.enable_backprop = False
    
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy
    
    ve = VisualEncoder().to_gpu(gpu_id)
    if 'attention' in params['id']:
        le = LanguageEncoderAttn(len(loader.ix_to_word)).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word)).to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    serializers.load_hdf5(params['model_root']+params['dataset']+'_'+params['splitBy']+'/'+params['id']+"ve.h5", ve)
    serializers.load_hdf5(params['model_root']+params['dataset']+'_'+params['splitBy']+'/'+params['id']+"le.h5", le)
    serializers.load_hdf5(params['model_root']+params['dataset']+'_'+params['splitBy']+'/'+params['id']+"cca.h5", cca)        
    
    img_to_ref_ids, img_to_ref_confusion = calc_confusion(loader, data, ref_to_beams, ve, le, cca, params, xp)
    
    sys.path.insert(0, osp.join('pyutils', 'refer2'))
    sys.path.insert(0, osp.join('pyutils', 'refer2', 'evaluation'))
    from refer import REFER
    from refEvaluation import RefEvaluation
    from crossEvaluation import CrossEvaluation
    refer = REFER(params['data_root'], params['dataset'], params['splitBy'], old_version=params['old'])
    
    if params['dataset'] == 'refcoco':
        lambda1 = 5  
        lambda2 = 5
    elif params['dataset'] == 'refcoco+':
        lambda1 = 5
        lambda2 = 5
    elif params['dataset'] == 'refcocog':
        lambda1 = 5
        lambda2 = 5
    else:
        error('No such dataset option for ', params['dataset'])
        

    # compute unary potential, img_to_ref_unary
    # let's firstly try one image
    Res = []
    for image_id in img_to_ref_confusion:
        # ref_ids and confusion matrices for this image
        img_ref_ids = img_to_ref_ids[image_id]
        ref_to_confusion = img_to_ref_confusion[image_id]
        # compute unary potential for each ref_id
        for ref_id in img_ref_ids:
            confusion = ref_to_confusion[ref_id]  # (beam_size, #img_ref_ids)
            beams = ref_to_beams[ref_id]  # [{ppl, sent, logp}] of beam_size
            compute_unary(ref_id, beams, confusion, img_ref_ids, lambda1, lambda2)

        # here's more preparation
        ref_beam_to_ix, ix_to_ref_beam, all_beams = make_index(img_ref_ids, ref_to_beams)

        # compute pairwise potentials
        pairwise_ref_beam_ids = compute_pairwise(img_ref_ids, ref_to_beams)

        # call cplex
        res = bilp(img_ref_ids, ref_to_beams, all_beams, pairwise_ref_beam_ids, ref_beam_to_ix, loader)
        Res += res
    # evaluate
    refEval = RefEvaluation(refer, Res)
    refEval.evaluate()
    overall = {}
    for metric, score in refEval.eval.items():
        overall[metric] = score
    print (overall)

    if params['write_result'] > 0:
        file_name = params['model_id']+'_'+params['split']+'_beamrerank.json'
        result_path = osp.join('cache', 'lang', params['dataset']+'_'+params['splitBy'], file_name)
        refToEval = refEval.refToEval
        for res in Res:
            ref_id, sent = res['ref_id'], res['sent']
            refToEval[ref_id]['sent'] = sent
        with open(result_path[:-5] + '_out.json', 'w') as outfile:
            json.dump({'overall': overall, 'refToEval': refToEval}, outfile)

    # CrossEvaluation takes as input [{ref_id, sent}]
    ceval = CrossEvaluation(refer, Res)
    ceval.cross_evaluate()
    ceval.make_ref_to_evals()
    ref_to_evals = ceval.ref_to_evals  # ref_to_evals = {ref_id: {ref_id: {method: score}}}

    # compute cross score
    xcider = ceval.Xscore('CIDEr')
コード例 #14
0
def eval_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])
    result_dir = osp.join('result',
                          params['dataset'] + '_' + params['splitBy'])

    if not osp.isdir(result_dir):
        os.makedirs(result_dir)

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats_h5'] = 'old' + params['image_feats']
        params['ann_feats_h5'] = 'old' + params['ann_feats']
        params['ann_feats_input'] = 'old' + params['ann_feats_input']
        params['shapes'] = 'old' + params['shapes']
        params['id'] = 'old' + params['id']

    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        global_shapes = (224, 224)
    elif params['dataset'] == 'refgta':
        global_shapes = (480, 288)
    loader = DataLoader(params)

    featsOpt = {
        'sp_ann': osp.join(target_save_dir, params['sp_ann_feats']),
        'ann_input': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats']),
        'shapes': osp.join(target_save_dir, params['ann_shapes'])
    }
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    ve = VisualEncoder(global_shapes=global_shapes).to_gpu(gpu_id)
    rl_crit = ListenerReward(len(loader.ix_to_word),
                             global_shapes=global_shapes).to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word), loader.seq_length,
                       global_shapes).to_gpu(gpu_id)

    serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit)

    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve)
    serializers.load_hdf5(
        osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm)

    accuracy = 0
    loss_evals = 0
    while True:
        data = loader.getImageBatch(params['split'], params)
        image_id = data['image_id']
        img_ann_ids = data['img_ann_ids']
        sent_ids = data['sent_ids']
        gd_ixs = data['gd_ixs']
        feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        sp_cxt_feats = Variable(
            xp.array(data['sp_cxt_feats'], dtype=xp.float32))
        sp_ann_feats = Variable(
            xp.array(data['sp_ann_feats'], dtype=xp.float32))
        local_shapes = data['local_shapes']
        seqz = data['seqz']
        lang_last_ind = calc_max_ind(seqz)
        for i, sent_id in enumerate(sent_ids):
            gd_ix = gd_ixs[i]
            labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
            labels[gd_ix] = 1
            labels = Variable(labels)

            sent_seqz = np.concatenate([[seqz[i]]
                                        for _ in range(len(img_ann_ids))],
                                       axis=0)
            one_last_ind = np.array([lang_last_ind[i]] * len(img_ann_ids))
            sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))

            coord = cuda.to_cpu(
                feats[:, sum(ve.feat_ind[:1]):sum(ve.feat_ind[:2])].data)
            local_sp_coord, global_sp_coord = calc_coordinate_feature(
                coord, local_shapes, global_shapes=global_shapes)
            local_sp_coord, global_sp_coord = xp.array(
                local_sp_coord, dtype=xp.float32), xp.array(global_sp_coord,
                                                            dtype=xp.float32)
            vis_enc_feats = ve(feats, sp_cxt_feats, coord)
            sp_feats, sp_feats_emb = lm.calc_spatial_features(
                sp_cxt_feats, sp_ann_feats, local_sp_coord, global_sp_coord)
            vis_feats = vis_enc_feats
            logprobs = lm(vis_feats, sp_feats, sp_feats_emb, coord, sent_seqz,
                          one_last_ind).data

            lm_scores = -cuda.to_cpu(computeLosses(logprobs, one_last_ind))

            score = cuda.to_cpu(
                F.sigmoid(
                    rl_crit.calc_score(feats, sp_cxt_feats, coord, sent_seqz,
                                       one_last_ind)).data)[:, 0]

            if params['mode'] == 0:
                _, pos_sc, max_neg_sc = compute_margin_loss(
                    lm_scores, gd_ix, 0)
            elif params['mode'] == 1:
                _, pos_sc, max_neg_sc = compute_margin_loss(score, gd_ix, 0)
            elif params['mode'] == 2:
                scores = score + params['lamda'] * lm_scores
                _, pos_sc, max_neg_sc = compute_margin_loss(scores, gd_ix, 0)
            if pos_sc > max_neg_sc:
                accuracy += 1
            loss_evals += 1
            print('{}-th: evaluating [{}]  ... image[{}/{}] sent[{}], acc={}'.
                  format(loss_evals, params['split'],
                         data['bounds']['it_pos_now'],
                         data['bounds']['it_max'], i,
                         accuracy * 100.0 / loss_evals))

        if data['bounds']['wrapped']:
            print('validation finished!')
            f = open(
                result_dir + '/' + params['id'] + params['id2'] +
                str(params['mode']) + str(params['lamda']) + 'comp.txt', 'w')
            f.write(str(accuracy * 100.0 / loss_evals))
            f.close()
            break
コード例 #15
0
def main(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']

    if params['dataset'] in ['refcoco', 'refcoco+', 'refcocog']:
        global_shapes = (224, 224)
        image_root = params['coco_image_root']
    elif params['dataset'] == 'refgta':
        global_shapes = (480, 288)
        image_root = params['gta_image_root']

    with open(target_save_dir + params["split"] + '_' + params['id'] +
              params['id2'] + str(params['beam_width']) + '.json') as f:
        data = json.load(f)
    ref_to_beams = {item['ref_id']: item['beam'] for item in data}

    # add ref_id to each beam
    for ref_id, beams in ref_to_beams.items():
        for beam in beams:
            beam['ref_id'] = ref_id  # make up ref_id in beam

    loader = DataLoader(params)
    featsOpt = {
        'sp_ann': osp.join(target_save_dir, params['sp_ann_feats']),
        'ann_input': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats']),
        'shapes': osp.join(target_save_dir, params['ann_shapes'])
    }
    loader.loadFeats(featsOpt)
    loader.shuffle('train')
    loader.loadFeats(featsOpt)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    rl_crit = ListenerReward(len(loader.ix_to_word),
                             global_shapes=global_shapes).to_gpu(gpu_id)
    serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit)
    #serializers.load_hdf5(osp.join(model_dir, "attn_rank.h5"), rl_crit)
    img_to_ref_ids, img_to_ref_confusion = calc_confusion(
        loader, data, ref_to_beams, rl_crit, params, xp)

    sys.path.insert(0, osp.join('pyutils', 'refer2'))
    sys.path.insert(0, osp.join('pyutils', 'refer2', 'evaluation'))
    from refer import REFER
    from refEvaluation import RefEvaluation
    from crossEvaluation import CrossEvaluation
    refer = REFER(params['data_root'],
                  image_root,
                  params['dataset'],
                  params['splitBy'],
                  old_version=params['old'])

    if params['dataset'] == 'refcoco':
        lambda1 = 5
        lambda2 = 5
    elif params['dataset'] == 'refcoco+':
        lambda1 = 5
        lambda2 = 5
    elif params['dataset'] == 'refcocog':
        lambda1 = 5
        lambda2 = 5
    elif params['dataset'] == 'refgta':
        lambda1 = 5
        lambda2 = 5
    else:
        error('No such dataset option for ', params['dataset'])

    # compute unary potential, img_to_ref_unary
    # let's firstly try one image
    Res = []
    for image_id in img_to_ref_confusion:
        # ref_ids and confusion matrices for this image
        img_ref_ids = img_to_ref_ids[image_id]
        ref_to_confusion = img_to_ref_confusion[image_id]
        # compute unary potential for each ref_id
        for ref_id in img_ref_ids:
            confusion = ref_to_confusion[ref_id]  # (beam_size, #img_ref_ids)
            beams = ref_to_beams[ref_id]  # [{ppl, sent, logp}] of beam_size
            compute_unary(ref_id, beams, confusion, img_ref_ids, lambda1,
                          lambda2)

        # here's more preparation
        ref_beam_to_ix, ix_to_ref_beam, all_beams = make_index(
            img_ref_ids, ref_to_beams)

        # compute pairwise potentials
        pairwise_ref_beam_ids = compute_pairwise(img_ref_ids, ref_to_beams)

        # call cplex
        res = bilp(img_ref_ids, ref_to_beams, all_beams, pairwise_ref_beam_ids,
                   ref_beam_to_ix, loader)
        Res += res
    # evaluate
    eval_cider_r = params['dataset'] == 'refgta'
    refEval = RefEvaluation(refer, Res, eval_cider_r=eval_cider_r)
    refEval.evaluate()
    overall = {}
    for metric, score in refEval.eval.items():
        overall[metric] = score
    print(overall)

    if params['write_result'] > 0:
        refToEval = refEval.refToEval
        for res in Res:
            ref_id, sent = res['ref_id'], res['sent']
            refToEval[ref_id]['sent'] = sent
        with open('' + params['id'] + params['id2'] + '_out.json',
                  'w') as outfile:
            json.dump({'overall': overall, 'refToEval': refToEval}, outfile)

    # CrossEvaluation takes as input [{ref_id, sent}]
    ceval = CrossEvaluation(refer, Res)
    ceval.cross_evaluate()
    ceval.make_ref_to_evals()
    ref_to_evals = ceval.ref_to_evals  # ref_to_evals = {ref_id: {ref_id: {method: score}}}

    # compute cross score
    xcider = ceval.Xscore('CIDEr')