Esempio n. 1
0
def forward_samples(model, image, samples, out_layer="conv3"):
    """Forward samples through the network."""
    model.eval()
    extractor = tracking.data_prov.RegionExtractor(image, samples, opts)
    for i, regions in enumerate(extractor):
        if opts["use_gpu"]:
            regions = regions.cuda()
        with torch.no_grad():
            feat = model(regions, out_layer=out_layer)
        if i == 0:
            feats = feat.detach().clone()
        else:
            feats = torch.cat((feats, feat.detach().clone()), 0)
    return feats
Esempio n. 2
0
def G_pretrain_v3(model,
                  model_G,
                  g_criterion,
                  g_optimizer,
                  maxiter,
                  g_lr,
                  in_layer='fc4'):
    '''
    This function is used to pretrain model_G, using the feature extracted from the RoIAlign module instead of that from the original MDNet.
    '''
    # Judge if pretrained model exists. If it does, then load the model already saved, otherwise pretrain model_G
    model_path = './models/g_model' + str(g_lr) + '.pth'
    if os.path.exists(model_path):
        model_G.load_state_dict(torch.load(model_path))
        print('Loading model: {}'.format(model_path))
        return

    batch_pos = opts['g_pretrain_pos']
    # Init image crop model
    img_crop_model = imgCropper(1.)
    if opts['use_gpu']:
        img_crop_model.gpuEnable()

    if os.path.exists('./models/feats_before.npy'):
        feats_before = np.load('./models/feats_before.npy')
        feats_after = np.load('./models/feats_after.npy')
    else:

        path = "./models/G_sample_list_2.mat"
        res = loadmat(path)

        res = res['G_sample_list']
        res = res[0, 0]

        G_samplelist = {}
        G_samplegt = {}
        attr_name = [
            'illum', 'illum_after', 'motion', 'motion_after', 'size',
            'size_after', 'occlusion', 'visible', 'illum_gt', 'illum_after_gt',
            'motion_gt', 'motion_after_gt', 'size_gt', 'size_after_gt',
            'occlusion_gt', 'visible_gt'
        ]

        for i in range(res.__len__()):
            if i < 8:
                temp = res[i]
                G_samplelist[attr_name[i]] = [
                    item[0][0].encode('unicode-escape').decode('string_escape')
                    for item in temp
                ]
            else:
                G_samplegt[attr_name[i]] = res[i]

        feats = {}

        for (key, val) in G_samplelist.items():
            feats[key] = []
            for val_item in val:
                cur_img = Image.open(val_item).convert('RGB')
                cur_img = np.asarray(cur_img)

                cur_initbbox = G_samplegt[key + '_gt'][i]
                cur_bbox = np.asarray(cur_initbbox).copy().reshape(1, 4)

                target_bbox = np.array(cur_initbbox)

                # compute padded sample
                padded_x1 = (cur_bbox[:, 0] - cur_bbox[:, 2] *
                             (opts['padding'] - 1.) / 2.).min()
                padded_y1 = (cur_bbox[:, 1] - cur_bbox[:, 3] *
                             (opts['padding'] - 1.) / 2.).min()
                padded_x2 = (cur_bbox[:, 0] + cur_bbox[:, 2] *
                             (opts['padding'] + 1.) / 2.).max()
                padded_y2 = (cur_bbox[:, 1] + cur_bbox[:, 3] *
                             (opts['padding'] + 1.) / 2.).max()
                padded_scene_box = np.reshape(
                    np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                                padded_y2 - padded_y1)), (1, 4))

                scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
                jitter_scale = [1.]

                model.eval()
                crop_img_size = (scene_boxes[0, 2:4] * (
                    (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                                 ).astype('int64') * jitter_scale[0]
                cropped_image, cur_image_var = img_crop_model.crop_image(
                    cur_img, np.reshape(scene_boxes[0], (1, 4)), crop_img_size)
                cropped_image = cropped_image - 128.

                feat_map = model(cropped_image, out_layer='conv3')

                rel_target_bbox = np.copy(target_bbox)
                rel_target_bbox[0:2] -= scene_boxes[0, 0:2]

                batch_num = np.zeros((cur_bbox.shape[0], 1))
                cur_rois = np.copy(cur_bbox)
                cur_rois[:,
                         0:2] -= np.repeat(np.reshape(scene_boxes[0, 0:2],
                                                      (1, 2)),
                                           cur_rois.shape[0],
                                           axis=0)
                scaled_obj_size = float(opts['img_size']) * jitter_scale[0]
                cur_rois = samples2maskroi(cur_rois, model.receptive_field,
                                           (scaled_obj_size, scaled_obj_size),
                                           target_bbox[2:4], opts['padding'])
                cur_rois = np.concatenate((batch_num, cur_rois), axis=1)
                cur_rois = Variable(
                    torch.from_numpy(cur_rois.astype('float32'))).cuda()
                cur_feats = model.roi_align_model(feat_map, cur_rois)
                cur_feats = cur_feats.reshape(cur_feats.shape[0],
                                              -1).data.clone()
                cur_feats = cur_feats.cpu().numpy()
                feats[key].append(np.squeeze(cur_feats, axis=0))

        feats_before = np.concatenate(
            (feats['illum'], feats['size'], feats['motion'], feats['visible']),
            axis=0)
        feats_after = np.concatenate(
            (feats['illum_after'], feats['size_after'], feats['motion_after'],
             feats['occlusion']),
            axis=0)
        np.save('./models/feats_before.npy', feats_before)
        np.save('./models/feats_after.npy', feats_after)

    if opts['use_gpu']:
        feats_before = Variable(
            torch.from_numpy(feats_before.astype('float32'))).cuda()
        feats_after = Variable(torch.from_numpy(
            feats_after.astype('float32'))).cuda()

    idx_before = np.where(feats_before == 0)
    idx_after = np.where(feats_after == 0)

    mask = torch.ones_like(feats_after)
    mask[idx_after] = 0
    mask[idx_before] = 1

    # pretrain G
    pos_idx = np.random.permutation(feats_before.size(0))
    while (len(pos_idx) < batch_pos * maxiter):
        pos_idx = np.concatenate(
            [pos_idx, np.random.permutation(feats_before.size(0))])
    pos_pointer = 0

    for iter in range(maxiter):

        # select pos idx
        pos_next = pos_pointer + batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = feats_before.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # create batch
        batch_data_feats = Variable(feats_before.index_select(0, pos_cur_idx))
        labels = Variable(mask.index_select(0, pos_cur_idx))
        if opts['use_gpu']:
            labels = labels.cuda()

        model_G.train()  ## model transfer into train mode

        # forward
        score = model_G(batch_data_feats)

        # optimize
        loss = g_criterion(score, labels)
        model_G.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model_G.parameters(), opts['grad_clip'])
        g_optimizer.step()

        if opts['visual_log']:
            print("Iter %d, G_Loss %.4f" % (iter, loss.data[0]))

    torch.save(model_G.state_dict(), model_path)
Esempio n. 3
0
def run_mdnet(img_list,
              init_bbox,
              num2drop,
              g_lr,
              g_lr_update,
              gt=None,
              seq='seq_name ex)Basketball',
              savefig_dir='',
              display=False):

    ############################################
    ############################################
    ############################################
    # Init bbox
    target_bbox = np.array(init_bbox)
    secPerFrame = np.zeros((len(img_list), 1))
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = np.copy(target_bbox)
    result_bb[0] = np.copy(target_bbox)

    iou_result = np.zeros((len(img_list), 1))

    # execution time array
    exec_time_result = np.zeros((len(img_list), 1))

    # Init model
    #model_G = net_G()
    model_G = net_G_v2()
    model = MDNet(opts['model_path'])

    if opts['adaptive_align']:
        align_h = model.roi_align_model.aligned_height
        align_w = model.roi_align_model.aligned_width
        spatial_s = model.roi_align_model.spatial_scale
        model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
    if opts['use_gpu']:
        model = model.cuda()
        model_G = model_G.cuda()

    model.set_learnable_params(opts['ft_layers'])
    model_G.set_learnable_params()

    # Init image crop model
    img_crop_model = imgCropper(1.)
    if opts['use_gpu']:
        img_crop_model.gpuEnable()

    # Init criterion and optimizer
    #criterion = FocalBinaryLoss()
    criterion = BinaryLoss()
    init_optimizer = set_optimizer(model, opts['lr_init'])
    update_optimizer = set_optimizer(model, opts['lr_update'])

    g_criterion = nn.MSELoss()
    g_optimizer = optim.SGD(model_G.parameters(),
                            lr=g_lr,
                            weight_decay=opts['w_decay'],
                            momentum=opts['momentum'])
    g_update_optimizer = optim.SGD(model_G.parameters(),
                                   lr=g_lr_update,
                                   weight_decay=opts['w_decay'],
                                   momentum=opts['momentum'])

    tic = time.time()
    # Load first image
    cur_image = Image.open(img_list[0]).convert('RGB')
    cur_image = np.asarray(cur_image)

    # Draw pos/neg samples
    ishape = cur_image.shape
    pos_examples = gen_samples(
        SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),
        target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
    neg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1),
        target_bbox, opts['n_neg_init'], opts['overlap_neg_init'])
    neg_examples = np.random.permutation(neg_examples)

    cur_bbreg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5,
                        1.1), target_bbox, opts['n_bbreg'],
        opts['overlap_bbreg'], opts['scale_bbreg'])

    # compute padded sample
    padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_scene_box = np.reshape(
        np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                    padded_y2 - padded_y1)), (1, 4))

    scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
    if opts['jitter']:
        ## horizontal shift
        jittered_scene_box_horizon = np.copy(padded_scene_box)
        jittered_scene_box_horizon[0, 0] -= 4.
        jitter_scale_horizon = 1.

        ## vertical shift
        jittered_scene_box_vertical = np.copy(padded_scene_box)
        jittered_scene_box_vertical[0, 1] -= 4.
        jitter_scale_vertical = 1.

        jittered_scene_box_reduce1 = np.copy(padded_scene_box)
        jitter_scale_reduce1 = 1.1**(-1)

        ## vertical shift
        jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
        jitter_scale_enlarge1 = 1.1**(1)

        ## scale reduction
        jittered_scene_box_reduce2 = np.copy(padded_scene_box)
        jitter_scale_reduce2 = 1.1**(-2)
        ## scale enlarge
        jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
        jitter_scale_enlarge2 = 1.1**(2)

        scene_boxes = np.concatenate([
            scene_boxes, jittered_scene_box_horizon,
            jittered_scene_box_vertical, jittered_scene_box_reduce1,
            jittered_scene_box_enlarge1, jittered_scene_box_reduce2,
            jittered_scene_box_enlarge2
        ],
                                     axis=0)
        jitter_scale = [
            1., jitter_scale_horizon, jitter_scale_vertical,
            jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2,
            jitter_scale_enlarge2
        ]
    else:
        jitter_scale = [1.]

    model.eval()
    for bidx in range(0, scene_boxes.shape[0]):
        crop_img_size = (scene_boxes[bidx, 2:4] * (
            (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                         ).astype('int64') * jitter_scale[bidx]
        cropped_image, cur_image_var = img_crop_model.crop_image(
            cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size)
        cropped_image = cropped_image - 128.

        feat_map = model(cropped_image, out_layer='conv3')

        rel_target_bbox = np.copy(target_bbox)
        rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

        batch_num = np.zeros((pos_examples.shape[0], 1))
        cur_pos_rois = np.copy(pos_examples)
        cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_pos_rois.shape[0],
                                          axis=0)
        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
        cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
        cur_pos_rois = Variable(
            torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
        cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
        cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                           -1).data.clone()

        batch_num = np.zeros((neg_examples.shape[0], 1))
        cur_neg_rois = np.copy(neg_examples)
        cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_neg_rois.shape[0],
                                          axis=0)
        cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
        cur_neg_rois = Variable(
            torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
        cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
        cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                           -1).data.clone()

        ## bbreg rois
        batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
        cur_bbreg_rois = np.copy(cur_bbreg_examples)
        cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                       (1, 2)),
                                            cur_bbreg_rois.shape[0],
                                            axis=0)
        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
        cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,
                                         (scaled_obj_size, scaled_obj_size),
                                         target_bbox[2:4], opts['padding'])
        cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1)
        cur_bbreg_rois = Variable(
            torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda()
        cur_bbreg_feats = model.roi_align_model(feat_map, cur_bbreg_rois)
        cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0),
                                               -1).data.clone()

        feat_dim = cur_pos_feats.size(-1)

        if bidx == 0:
            pos_feats = cur_pos_feats
            neg_feats = cur_neg_feats
            ##bbreg feature
            bbreg_feats = cur_bbreg_feats
            bbreg_examples = cur_bbreg_examples
        else:
            pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
            neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
            ##bbreg feature
            bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
            bbreg_examples = np.concatenate(
                (bbreg_examples, cur_bbreg_examples), axis=0)

    if pos_feats.size(0) > opts['n_pos_init']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :]
    if neg_feats.size(0) > opts['n_neg_init']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :]

    ##bbreg
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]

    ## open images and crop patch from obj
    extra_obj_size = np.array((opts['img_size'], opts['img_size']))
    extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6)
    replicateNum = 100
    for iidx in range(replicateNum):
        extra_target_bbox = np.copy(target_bbox)

        extra_scene_box = np.copy(extra_target_bbox)
        extra_scene_box_center = extra_scene_box[
            0:2] + extra_scene_box[2:4] / 2.
        extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6)
        extra_scene_box[
            0:2] = extra_scene_box_center - extra_scene_box_size / 2.
        extra_scene_box[2:4] = extra_scene_box_size

        extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4)
        cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2)

        extra_scene_box[0] += extra_shift_offset[0]
        extra_scene_box[1] += extra_shift_offset[1]
        extra_scene_box[2:4] *= cur_extra_scale[0]

        scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0]

        cur_extra_cropped_image, _ = img_crop_model.crop_image(
            cur_image, np.reshape(extra_scene_box, (1, 4)),
            extra_crop_img_size)
        cur_extra_cropped_image = cur_extra_cropped_image.detach()

        cur_extra_pos_examples = gen_samples(
            SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1,
                            1.2), extra_target_bbox,
            opts['n_pos_init'] / replicateNum, opts['overlap_pos_init'])
        cur_extra_neg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2,
                            1.1), extra_target_bbox,
            opts['n_neg_init'] / replicateNum / 4, opts['overlap_neg_init'])

        ##bbreg sample
        cur_extra_bbreg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),
            extra_target_bbox, opts['n_bbreg'] / replicateNum / 4,
            opts['overlap_bbreg'], opts['scale_bbreg'])

        batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1))
        cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
        cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_pos_rois.shape[0],
                                                axis=0)
        cur_extra_pos_rois = samples2maskroi(
            cur_extra_pos_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois),
                                            axis=1)

        batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
        cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
        cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_neg_rois.shape[0],
                                                axis=0)
        cur_extra_neg_rois = samples2maskroi(
            cur_extra_neg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois),
                                            axis=1)

        ## bbreg rois
        batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
        cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
        cur_extra_bbreg_rois[:,
                             0:2] -= np.repeat(np.reshape(
                                 extra_scene_box[0:2], (1, 2)),
                                               cur_extra_bbreg_rois.shape[0],
                                               axis=0)
        cur_extra_bbreg_rois = samples2maskroi(
            cur_extra_bbreg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_bbreg_rois = np.concatenate(
            (batch_num, cur_extra_bbreg_rois), axis=1)

        if iidx == 0:
            extra_cropped_image = cur_extra_cropped_image

            extra_pos_rois = np.copy(cur_extra_pos_rois)
            extra_neg_rois = np.copy(cur_extra_neg_rois)
            ##bbreg rois
            extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
            extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
        else:
            extra_cropped_image = torch.cat(
                (extra_cropped_image, cur_extra_cropped_image), dim=0)

            extra_pos_rois = np.concatenate(
                (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
            extra_neg_rois = np.concatenate(
                (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
            ##bbreg rois
            extra_bbreg_rois = np.concatenate(
                (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0)
            extra_bbreg_examples = np.concatenate(
                (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)),
                axis=0)

    extra_pos_rois = Variable(
        torch.from_numpy(extra_pos_rois.astype('float32'))).cuda()
    extra_neg_rois = Variable(
        torch.from_numpy(extra_neg_rois.astype('float32'))).cuda()
    ##bbreg rois
    extra_bbreg_rois = Variable(
        torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda()

    extra_cropped_image -= 128.

    extra_feat_maps = model(extra_cropped_image, out_layer='conv3')
    # Draw pos/neg samples
    ishape = cur_image.shape

    extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois)
    extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0),
                                           -1).data.clone()

    extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois)
    extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0),
                                           -1).data.clone()
    ##bbreg feat
    extra_bbreg_feats = model.roi_align_model(extra_feat_maps,
                                              extra_bbreg_rois)
    extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0),
                                               -1).data.clone()

    ## concatenate extra features to original_features
    pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0)
    neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
    ## concatenate extra bbreg feats to original_bbreg_feats
    bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
    bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples),
                                    axis=0)

    torch.cuda.empty_cache()
    model.zero_grad()
    model_G.zero_grad()

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    G_pretrain_v3(model, model_G, g_criterion, g_optimizer,
                  opts['maxiter_g_pretrain'], g_lr)

    ##bbreg train
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
    bbreg = BBRegressor((ishape[1], ishape[0]))
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)

    if pos_feats.size(0) > opts['n_pos_update']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats_all = [
            pos_feats.index_select(
                0,
                torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
        ]
    if neg_feats.size(0) > opts['n_neg_update']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats_all = [
            neg_feats.index_select(
                0,
                torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())
        ]

    spf_total = time.time() - tic
    #spf_total = 0. # no first frame

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (cur_image.shape[1] / dpi, cur_image.shape[0] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(cur_image, aspect='normal')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    trans_f = opts['trans_f']
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        cur_image = Image.open(img_list[i]).convert('RGB')
        cur_image = np.asarray(cur_image)

        # Estimate target bbox
        ishape = cur_image.shape
        samples = gen_samples(
            SampleGenerator('gaussian', (ishape[1], ishape[0]),
                            trans_f,
                            opts['scale_f'],
                            valid=True), target_bbox, opts['n_samples'])

        padded_x1 = (samples[:, 0] - samples[:, 2] *
                     (opts['padding'] - 1.) / 2.).min()
        padded_y1 = (samples[:, 1] - samples[:, 3] *
                     (opts['padding'] - 1.) / 2.).min()
        padded_x2 = (samples[:, 0] + samples[:, 2] *
                     (opts['padding'] + 1.) / 2.).max()
        padded_y2 = (samples[:, 1] + samples[:, 3] *
                     (opts['padding'] + 1.) / 2.).max()
        padded_scene_box = np.asarray(
            (padded_x1, padded_y1, padded_x2 - padded_x1,
             padded_y2 - padded_y1))

        if padded_scene_box[0] > cur_image.shape[1]:
            padded_scene_box[0] = cur_image.shape[1] - 1
        if padded_scene_box[1] > cur_image.shape[0]:
            padded_scene_box[1] = cur_image.shape[0] - 1
        if padded_scene_box[0] + padded_scene_box[2] < 0:
            padded_scene_box[2] = -padded_scene_box[0] + 1
        if padded_scene_box[1] + padded_scene_box[3] < 0:
            padded_scene_box[3] = -padded_scene_box[1] + 1

        crop_img_size = (padded_scene_box[2:4] *
                         ((opts['img_size'], opts['img_size']) /
                          target_bbox[2:4])).astype('int64')
        cropped_image, cur_image_var = img_crop_model.crop_image(
            cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
        cropped_image = cropped_image - 128.

        model.eval()
        feat_map = model(cropped_image, out_layer='conv3')

        # relative target bbox with padded_scene_box
        rel_target_bbox = np.copy(target_bbox)
        rel_target_bbox[0:2] -= padded_scene_box[0:2]

        # Extract sample features and get target location
        batch_num = np.zeros((samples.shape[0], 1))
        sample_rois = np.copy(samples)
        sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2],
                                                    (1, 2)),
                                         sample_rois.shape[0],
                                         axis=0)
        sample_rois = samples2maskroi(sample_rois, model.receptive_field,
                                      (opts['img_size'], opts['img_size']),
                                      target_bbox[2:4], opts['padding'])
        sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
        sample_rois = Variable(torch.from_numpy(
            sample_rois.astype('float32'))).cuda()
        sample_feats = model.roi_align_model(feat_map, sample_rois)
        sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()
        sample_scores = model(sample_feats, in_layer='fc4')
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.data.cpu().numpy()
        target_score = top_scores.data.mean()
        target_bbox = samples[top_idx].mean(axis=0)

        success = target_score > opts['success_thr']

        # # Expand search area at failure
        if success:
            trans_f = opts['trans_f']
        else:
            trans_f = opts['trans_f_expand']

        ## Bbox regression
        if success:
            bbreg_feats = sample_feats[top_idx, :]
            bbreg_samples = samples[top_idx]
            bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox
        iou_result[i] = 1.

        # Data collect
        if success:

            # Draw pos/neg samples
            pos_examples = gen_samples(
                SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),
                target_bbox, opts['n_pos_update'], opts['overlap_pos_update'])
            neg_examples = gen_samples(
                SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2),
                target_bbox, opts['n_neg_update'], opts['overlap_neg_update'])

            padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                         (opts['padding'] - 1.) / 2.).min()
            padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                         (opts['padding'] - 1.) / 2.).min()
            padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                         (opts['padding'] + 1.) / 2.).max()
            padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                         (opts['padding'] + 1.) / 2.).max()
            padded_scene_box = np.reshape(
                np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                            padded_y2 - padded_y1)), (1, 4))

            scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
            jitter_scale = [1.]

            for bidx in range(0, scene_boxes.shape[0]):
                crop_img_size = (scene_boxes[bidx, 2:4] * (
                    (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                                 ).astype('int64') * jitter_scale[bidx]
                cropped_image, cur_image_var = img_crop_model.crop_image(
                    cur_image, np.reshape(scene_boxes[bidx], (1, 4)),
                    crop_img_size)
                cropped_image = cropped_image - 128.

                feat_map = model(cropped_image, out_layer='conv3')

                rel_target_bbox = np.copy(target_bbox)
                rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

                batch_num = np.zeros((pos_examples.shape[0], 1))
                cur_pos_rois = np.copy(pos_examples)
                cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(
                    scene_boxes[bidx, 0:2], (1, 2)),
                                                  cur_pos_rois.shape[0],
                                                  axis=0)
                scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
                cur_pos_rois = samples2maskroi(
                    cur_pos_rois, model.receptive_field,
                    (scaled_obj_size, scaled_obj_size), target_bbox[2:4],
                    opts['padding'])
                cur_pos_rois = np.concatenate((batch_num, cur_pos_rois),
                                              axis=1)
                cur_pos_rois = Variable(
                    torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
                cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
                cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                                   -1).data.clone()

                batch_num = np.zeros((neg_examples.shape[0], 1))
                cur_neg_rois = np.copy(neg_examples)
                cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(
                    scene_boxes[bidx, 0:2], (1, 2)),
                                                  cur_neg_rois.shape[0],
                                                  axis=0)
                cur_neg_rois = samples2maskroi(
                    cur_neg_rois, model.receptive_field,
                    (scaled_obj_size, scaled_obj_size), target_bbox[2:4],
                    opts['padding'])
                cur_neg_rois = np.concatenate((batch_num, cur_neg_rois),
                                              axis=1)
                cur_neg_rois = Variable(
                    torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
                cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
                cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                                   -1).data.clone()

                feat_dim = cur_pos_feats.size(-1)

                if bidx == 0:
                    pos_feats = cur_pos_feats  ##index select
                    neg_feats = cur_neg_feats
                else:
                    pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                    neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)

            if pos_feats.size(0) > opts['n_pos_update']:
                pos_idx = np.asarray(range(pos_feats.size(0)))
                np.random.shuffle(pos_idx)
                pos_feats = pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
            if neg_feats.size(0) > opts['n_neg_update']:
                neg_idx = np.asarray(range(neg_feats.size(0)))
                np.random.shuffle(neg_idx)
                neg_feats = neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())

            pos_feats_all.append(pos_feats)
            neg_feats_all.append(neg_feats)

            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.stack(pos_feats_all, 0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)
            # train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'])
            update_vital_v2_new(model, model_G, criterion, update_optimizer,
                                g_update_optimizer, g_criterion, True,
                                num2drop, pos_data, neg_data,
                                opts['maxiter_update'])
        spf = time.time() - tic
        spf_total += spf
        secPerFrame[i] = spf

        # Display
        if display or savefig:
            im.set_data(cur_image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)),
                            dpi=dpi)

        if opts['visual_log']:
            if gt is None:
                print "Frame %d/%d, Score %.3f, Time %.3f" % \
                    (i, len(img_list), target_score, spf)
            else:
                print "Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                    (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)
        # iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0]

    fps = len(img_list) / spf_total
    #fps = (len(img_list)-1) / spf_total #no first frame
    # return iou_result, result_bb, fps, result, secPerFrame
    return result_bb, fps, result, secPerFrame
Esempio n. 4
0
def update_vital_v2_new(model,
                        model_G,
                        criterion,
                        optimizer,
                        g_optimizer,
                        g_criterion,
                        adversarial,
                        num2drop,
                        pos_feats,
                        neg_feats,
                        maxiter,
                        in_layer='fc4'):
    model.train()

    batch_pos = opts['batch_pos']
    batch_neg = opts['batch_neg']
    batch_test = opts['batch_test']
    batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)

    pos_idx = np.random.permutation(pos_feats.size(0))
    neg_idx = np.random.permutation(neg_feats.size(0))
    while (len(pos_idx) < batch_pos * maxiter):
        pos_idx = np.concatenate(
            [pos_idx, np.random.permutation(pos_feats.size(0))])
    while (len(neg_idx) < batch_neg_cand * maxiter):
        neg_idx = np.concatenate(
            [neg_idx, np.random.permutation(neg_feats.size(0))])
    pos_pointer = 0
    neg_pointer = 0

    for iter in range(maxiter):

        # select pos idx
        pos_next = pos_pointer + batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # select neg idx
        neg_next = neg_pointer + batch_neg_cand
        neg_cur_idx = neg_idx[neg_pointer:neg_next]
        neg_cur_idx = neg_feats.new(neg_cur_idx).long()
        neg_pointer = neg_next

        # create batch
        batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
        batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))

        # hard negative mining
        # tic=time.time()
        if batch_neg_cand > batch_neg:
            model.eval()  ## model transfer into evaluation mode
            for start in range(0, batch_neg_cand, batch_test):
                end = min(start + batch_test, batch_neg_cand)
                score = model(batch_neg_feats[start:end], in_layer=in_layer)
                if start == 0:
                    neg_cand_score = score.data[:, 1].clone()
                else:
                    neg_cand_score = torch.cat(
                        (neg_cand_score, score.data[:, 1].clone()), 0)

            _, top_idx = neg_cand_score.topk(batch_neg)
            batch_neg_feats = batch_neg_feats.index_select(
                0, Variable(top_idx))
            # model.train() ## model transfer into train mode
        # t1=time.time()-tic

        #######################################################################################
        ######################           ADDED: generate mask              ####################
        #######################################################################################
        #batch_pos_data = batch_pos_feats.clone().reshape(batch_pos_feats.size(0),-1,3,3)

        ######################            Update Net D first               ####################
        model_G.eval()
        res = model_G(batch_pos_feats)
        model_G.train()
        mask = torch.ones_like(batch_pos_feats)
        mask[np.where(res == 0)] = 0
        batch_pos_feats_mask = batch_pos_feats * mask

        # forward

        model.train()
        pos_score = model(batch_pos_feats_mask, in_layer=in_layer)
        neg_score = model(batch_neg_feats, in_layer=in_layer)

        # optimize
        loss = criterion(pos_score, neg_score)
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip'])
        optimizer.step()
        model.eval()

        ######################              Update Net G now               ####################
        if adversarial == True:
            # num2drop = 512
            idxlist = np.zeros([9, num2drop])
            prob_k = np.zeros(9)
            for i in range(9):
                idxlist[i, :] = np.random.permutation(4608)[0:num2drop]
                cur_idx = pos_feats.new(idxlist[i]).long()
                cur_batch = Variable(pos_feats.index_select(0, pos_cur_idx))
                new_mask = mask.clone()
                new_mask[:, cur_idx] = 0
                new_cur_batch = cur_batch * new_mask

                res_ = model(new_cur_batch, in_layer=in_layer)
                X = res_
                X_maxchannel = torch.max(X, 1)
                X_maxchannel = X_maxchannel[0]
                X_maxchannel = X_maxchannel.view(X.size(0), 1)
                E = torch.exp(X - X_maxchannel)
                L = torch.sum(E, 1)
                Y = E / (L.view(X.size(0), 1))
                Y = Y.cpu().detach().numpy()
                prob_k[i] = np.sum(Y[:, 0])

            idx = np.where(prob_k == np.min(prob_k))

            batch_pos_data = Variable(pos_feats.index_select(0, pos_cur_idx))
            labels = mask.clone()
            labels[:, idx[0]] = 0
            if opts['use_gpu']:
                labels = labels.cuda()
            # t5=time.time()-tic
            model_G.train()  ## model transfer into train mode

            # forward
            score = model_G(batch_pos_data)

            # optimize
            loss_g = g_criterion(score, labels)
            model_G.zero_grad()
            loss_g.backward()
            torch.nn.utils.clip_grad_norm(model_G.parameters(),
                                          opts['grad_clip'])
            g_optimizer.step()

            if opts['visual_log']:
                print "Iter %d, Loss %.4f, G_Loss %.4f" % (iter, loss.data[0],
                                                           loss_g.data[0])
Esempio n. 5
0
def train(model,
          criterion,
          optimizer,
          pos_feats,
          neg_feats,
          maxiter,
          in_layer='fc4'):
    model.train()

    batch_pos = opts['batch_pos']
    batch_neg = opts['batch_neg']
    batch_test = opts['batch_test']
    batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)

    pos_idx = np.random.permutation(pos_feats.size(0))
    neg_idx = np.random.permutation(neg_feats.size(0))
    while (len(pos_idx) < batch_pos * maxiter):
        pos_idx = np.concatenate(
            [pos_idx, np.random.permutation(pos_feats.size(0))])
    while (len(neg_idx) < batch_neg_cand * maxiter):
        neg_idx = np.concatenate(
            [neg_idx, np.random.permutation(neg_feats.size(0))])
    pos_pointer = 0
    neg_pointer = 0

    for iter in range(maxiter):

        # select pos idx
        pos_next = pos_pointer + batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # select neg idx
        neg_next = neg_pointer + batch_neg_cand
        neg_cur_idx = neg_idx[neg_pointer:neg_next]
        neg_cur_idx = neg_feats.new(neg_cur_idx).long()
        neg_pointer = neg_next

        # create batch
        batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
        batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))

        # hard negative mining
        if batch_neg_cand > batch_neg:
            model.eval()  ## model transfer into evaluation mode
            for start in range(0, batch_neg_cand, batch_test):
                end = min(start + batch_test, batch_neg_cand)
                score = model(batch_neg_feats[start:end], in_layer=in_layer)
                if start == 0:
                    neg_cand_score = score.data[:, 1].clone()
                else:
                    neg_cand_score = torch.cat(
                        (neg_cand_score, score.data[:, 1].clone()), 0)

            _, top_idx = neg_cand_score.topk(batch_neg)
            batch_neg_feats = batch_neg_feats.index_select(
                0, Variable(top_idx))
            model.train()  ## model transfer into train mode

        # forward
        pos_score = model(batch_pos_feats, in_layer=in_layer)
        neg_score = model(batch_neg_feats, in_layer=in_layer)

        # optimize
        loss = criterion(pos_score, neg_score)
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip'])
        optimizer.step()

        if opts['visual_log']:
            print "Iter %d, Loss %.4f" % (iter, loss.data[0])
Esempio n. 6
0
def train(model,
          model_g,
          criterion,
          optimizer,
          pos_feats,
          neg_feats,
          maxiter,
          in_layer="fc4"):
    """Train the models."""
    model.train()

    batch_pos = opts["batch_pos"]
    batch_neg = opts["batch_neg"]
    batch_test = opts["batch_test"]
    batch_neg_cand = max(opts["batch_neg_cand"], batch_neg)

    pos_idx = numpy.random.permutation(pos_feats.size(0))
    neg_idx = numpy.random.permutation(neg_feats.size(0))
    while len(pos_idx) < batch_pos * maxiter:
        pos_idx = numpy.concatenate(
            [pos_idx, numpy.random.permutation(pos_feats.size(0))])
    while len(neg_idx) < batch_neg_cand * maxiter:
        neg_idx = numpy.concatenate(
            [neg_idx, numpy.random.permutation(neg_feats.size(0))])
    pos_pointer = 0
    neg_pointer = 0

    for _ in range(maxiter):

        # select pos idx
        pos_next = pos_pointer + batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # select neg idx
        neg_next = neg_pointer + batch_neg_cand
        neg_cur_idx = neg_idx[neg_pointer:neg_next]
        neg_cur_idx = neg_feats.new(neg_cur_idx).long()
        neg_pointer = neg_next

        # create batch
        batch_pos_feats = pos_feats[pos_cur_idx]
        if model_g is not None:
            batch_asdn_feats = pos_feats.index_select(0, pos_cur_idx)
        batch_neg_feats = neg_feats[neg_cur_idx]

        # hard negative mining
        if batch_neg_cand > batch_neg:
            model.eval()
            for start in range(0, batch_neg_cand, batch_test):
                end = min(start + batch_test, batch_neg_cand)
                with torch.no_grad():
                    score = model(batch_neg_feats[start:end],
                                  in_layer=in_layer)
                if start == 0:
                    neg_cand_score = score.detach()[:, 1].clone()
                else:
                    neg_cand_score = torch.cat(
                        (neg_cand_score, score.detach()[:, 1].clone()), 0)

            _, top_idx = neg_cand_score.topk(batch_neg)
            batch_neg_feats = batch_neg_feats[top_idx]
            model.train()

        if model_g is not None:
            model_g.eval()
            res_asdn = model_g(batch_asdn_feats)
            model_g.train()
            num = res_asdn.size(0)
            mask_asdn = torch.ones(num, 512, 3, 3)
            res_asdn = res_asdn.view(num, 3, 3)
            for j in range(num):
                feat_ = res_asdn[j, :, :]
                featlist = feat_.view(1, 9).squeeze()
                feat_list = featlist.detach().cpu().numpy()
                idlist = feat_list.argsort()
                idxlist = idlist[:3]

                for k, idx in enumerate(idxlist):
                    row = idx // 3
                    col = idx % 3
                    mask_asdn[:, :, col, row] = 0
            mask_asdn = mask_asdn.view(mask_asdn.size(0), -1)
            if opts["use_gpu"]:
                batch_asdn_feats = batch_asdn_feats.cuda()
                mask_asdn = mask_asdn.cuda()
            batch_asdn_feats = batch_asdn_feats * mask_asdn

        # forward
        if model_g is None:
            pos_score = model(batch_pos_feats, in_layer=in_layer)
        else:
            pos_score = model(batch_asdn_feats, in_layer=in_layer)
        neg_score = model(batch_neg_feats, in_layer=in_layer)

        # optimize
        loss = criterion(pos_score, neg_score)
        model.zero_grad()
        loss.backward()
        if "grad_clip" in opts:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           opts["grad_clip"])
        optimizer.step()

        if model_g is not None:
            start = time.time()
            prob_k = torch.zeros(9)
            for k in range(9):
                row = k // 3
                col = k % 3

                model.eval()
                batch = batch_pos_feats.view(batch_pos, 512, 3, 3)
                batch[:, :, col, row] = 0
                batch = batch.view(batch.size(0), -1)

                if opts["use_gpu"]:
                    batch = batch.cuda()

                prob = model(batch, in_layer="fc4", out_layer="fc6_softmax")[:,
                                                                             1]
                model.train()

                prob_k[k] = prob.sum()

            _, idx = torch.min(prob_k, 0)
            idx = idx.item()
            row = idx // 3
            col = idx % 3

            optimizer_g = gnet.g_init.set_optimizer_g(model_g)
            labels = torch.ones(batch_pos, 1, 3, 3)
            labels[:, :, col, row] = 0

            batch_pos_feats = batch_pos_feats.view(batch_pos_feats.size(0), -1)
            res = model_g(batch_pos_feats)
            labels = labels.view(batch_pos, -1)
            criterion_g = torch.nn.MSELoss(reduction="mean")
            loss_g_2 = criterion_g(res.float(), labels.cuda().float())
            model_g.zero_grad()
            loss_g_2.backward()
            optimizer_g.step()

            end = time.time()
Esempio n. 7
0
        "recall75",
        "precision",
        "conf_obj",
        "conf_noobj",
    ]

    for epoch in range(opt.epochs):
        model.train()
        start_time = time.time()
        for batch_i, (_, imgs, targets) in enumerate(dataloader):
            batches_done = len(dataloader) * epoch + batch_i

            imgs = Variable(imgs.to(device))
            targets = Variable(targets.to(device), requires_grad=False)

            loss, outputs = model(imgs, targets)
            loss.backward()

            if batches_done % opt.gradient_accumulations:
                # Accumulates gradient before each step
                optimizer.step()
                optimizer.zero_grad()

            # ----------------
            #   Log progress
            # ----------------

            log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                epoch, opt.epochs, batch_i, len(dataloader))

            metric_table = [[
Esempio n. 8
0
def main(args):

    if args.seed:
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

    # Q, X = load_data(args.dataset, args.data_path)
    ppi_embs = np.loadtxt('STRING_PPI_struc2vec_embs.txt', skiprows=1)
    ppi_id = ppi_embs[:, 0].astype(int)
    ppi_embs = ppi_embs[:, 1:]
    X = ppi_embs.T  # (d, num_nodes)
    Q = X[:, 0:3]

    if args.graph_mode == 'ransac':
        q_RANSAC_graph, x_RANSAC_graph = load_ransac_graph(
            args.dataset, args.data_path)
    elif args.graph_mode == 'approx_ransac':
        _, x_RANSAC_graph = load_ransac_graph(args.dataset, args.data_path)
        q_RANSAC_graph = None
    else:
        q_RANSAC_graph, x_RANSAC_graph = None, None

    q_adj, q_features, x_adj, x_features = gen_graph(
        Q, X, args.kq, args.k, q_RANSAC_graph, x_RANSAC_graph)

    all_features = np.concatenate([q_features, x_features])
    all_adj = combine_graph(q_adj, x_adj)

    all_adj_normed = preprocess_graph(all_adj)
    x_adj_normed = preprocess_graph(x_adj)
    x_adj_normed_sparse_tensor = convert_sparse_matrix_to_sparse_tensor(
        x_adj_normed)
    all_adj_normed_sparse_tensor = convert_sparse_matrix_to_sparse_tensor(
        all_adj_normed)

    # features_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, args.hidden_units])
    # adj_placeholder = tf.sparse_placeholder(dtype=tf.float32, shape=[None, None])

    # FIXME: regularizer = tf.contrib.layers.l2_regularizer(scale=args.regularizer_scale)

    model = ResidualGraphConvolutionalNetwork(train_batch_size=args.batch_size if args.batch_size > 0 else x_adj_normed.shape[0],
                                              val_batch_size=all_adj_normed.shape[0],
                                              num_layers=args.num_layers,
                                              hidden_units=args.hidden_units,
                                              init_weights=args.init_weights,
                                              layer_decay=args.layer_decay)

    if args.gpu_id is not None:
        model.cuda()
        x_adj_normed_sparse_tensor = x_adj_normed_sparse_tensor.cuda()
        all_adj_normed_sparse_tensor = all_adj_normed_sparse_tensor.cuda()
        print('using gpu')

    # FIXME: add flexible iterator
    training_dataset = DiffusionDataSet(features=x_features,
                                        adj=x_adj_normed_sparse_tensor)
    training_loader = DiffusionDataLoader(training_dataset,
                                          batch_size=args.batch_size if args.batch_size > 0 else len(
                                              training_dataset),
                                          num_workers=6,
                                          shuffle=True)
    validation_dataset = DiffusionDataSet(features=all_features,
                                          adj=all_adj_normed_sparse_tensor)
    validation_loader = DiffusionDataLoader(validation_dataset,
                                            batch_size=len(validation_dataset),
                                            num_workers=6,
                                            shuffle=False)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=0)

    # RMSprop reaches 76.48 with batch size 2048, although it's a bit better than Adam
    # and will not drop the map in later training.
    # optimizer = torch.optim.RMSprop(model.parameters(),
    #                              lr=args.lr,
    #                              centered=True)
    # DenseSparseAdam reaches 76.53 with batch size 2048, although it's a bit better than Adam
    # from allennlp.training.optimizers import DenseSparseAdam
    # optimizer = DenseSparseAdam(model.parameters(),
    #                              lr=args.lr)

    best_map = 0.0
    itr = 0
    model.train()
    if args.loss == 'gss':
        loss_fcn = GSS_loss(args.alpha).gss_loss
    elif args.loss == 'triplet':
        loss_fcn = GSS_loss(args.alpha).tri_loss

    while itr < args.epochs:
        # training step
        start_time = time.time()
        # forward
        for batch_id, batch_data in enumerate(training_loader):
            if args.gpu_id is not None:
                batch_data = batch_data.cuda()
            logits, hidden_emb = model(
                x=training_dataset.features.cuda(
                ) if args.gpu_id is not None else validation_dataset.features,
                adj=training_dataset.adj)

            if itr == 0:
                hidden_emb = hidden_emb.cpu().data
                if args.beta_percentile is not None:
                    beta_score = np.percentile(
                        np.dot(hidden_emb, hidden_emb.T).flatten(), args.beta_percentile)
                elif args.beta is not None:
                    beta_score = args.beta
                else:
                    raise Exception(
                        'At least one of beta and beta_percentile should be set!')

            # !# need to change the loss here to support tri_loss and load and use the graph computed
            loss = loss_fcn(logits=logits, beta=beta_score,
                            index=batch_data.detach())
            # if args.regularizer_scale:
            #     l2 = 0
            #     for p in model.parameters():
            #         l2 += (p**2).sum()
            #     loss += args.regularizer_scale * l2
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_time = time.time() - start_time
        itr += 1
        # ============
        # eval step
        # ============
        print(f"iter {itr}")
        # print(hidden_emb[0, :10])
    np.savetxt('graph_embs.txt', hidden_emb.cpu().data)
Esempio n. 9
0
def train(args):
    # random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # gpu
    if not args.no_cuda:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        print('使用%s号GPU' % args.gpu)

    # word vector
    corpus = Corpus()
    vocab, embed = corpus.load_embed(args.fn_embed)
    print("finish loading external word embeding, the shape is:")
    print(embed.shape)

    # model
    model_dict = {'lstm_comparing':BiLSTM_Encoding_Comparing, 'char_lstm_comparing':Char_BiLSTM_Encoding_Comparing}
    print("current model is", args.model)
    model_name = model_dict[args.model]
    if not args.no_cuda:
        embed = embed.cuda()
    model = model_name(args, embed)
    if not args.no_cuda:
        model.cuda()
    print(model)

    train_questions_raw, train_golds_raw, train_negs_raw = corpus.load_data(args.fn_train, 'train')
    valid_questions_raw, valid_golds_raw, valid_negs_raw = corpus.load_data(args.fn_valid, 'valid')

    train_questions = corpus.numericalize(train_questions_raw, args.input_mode)
    
    train_golds = corpus.numericalize(train_golds_raw, args.input_mode)
    train_negs = []
    for line in train_negs_raw:
        train_negs.append(corpus.numericalize(line, args.input_mode))
    
    # from pdb import set_trace
    # set_trace()
    if isinstance(train_questions, tuple):
        print("train data loaded!%d questions totally"%len(train_questions[0]))
    else:
        print("train data loaded!%d questions totally"%len(train_questions))

    valid_questions = corpus.numericalize(valid_questions_raw, args.input_mode)
    valid_golds = corpus.numericalize(valid_golds_raw, args.input_mode)
    valid_negs = []
    for index, line in enumerate(valid_negs_raw):
        valid_negs.append(corpus.numericalize(line, args.input_mode))
    
    if isinstance(valid_questions, tuple):
        print("valid data loaded!%d questions totally"%len(valid_questions[0]))
    else:
        print("valid data loaded!%d questions totally"%len(valid_questions))
    
    valid_dataset = (valid_questions, valid_golds, valid_negs)

    print("字符字典长度", corpus.len_char_dict())
    
    # dump vocab
    corpus.dump_vocab(args.vocab_word, mode='word')
    corpus.dump_vocab(args.vocab_char, mode='char')

    # training settings
    optimizer_dict = {"adam":Adam}
    optimizer_name = optimizer_dict[args.optimizer]
    print("choose optimizer:%s"%args.optimizer)
    optimizer = optimizer_name(model.parameters(), lr = args.learning_rate)
    
    criterion = MarginRankingLoss(margin=args.margin)
    
    patience = args.patience
    num_train_epochs = args.num_train_epochs
    iters_left = patience
    best_precision = 0
    num_not_improved = 0
    global_step = 0

    logger.info('\nstart training:%s'%datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    print("start training!")

    for epoch in range(args.num_train_epochs):
        # batchlize
        sample_train_negs = train_neg_sample(train_negs, args.neg_size, mode=args.input_mode)
        sample_train = (train_questions, train_golds, sample_train_negs)
        train_batches = train_batchlize(sample_train, args.batch_size, mode=args.input_mode)
        print("train data batchlized............")
        
        # 
        train_right = 0
        train_total = 0
        # 打印
        print('start time')
        start_time = datetime.now()
        logger.info('\nstart training:%s'%datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print(start_time)

        model.train()
        optimizer.zero_grad()
        loss_epoch = 0 # 单次迭代的总loss
        for step, batch in enumerate(train_batches):
            # if not args.no_cuda:
            #     batch = (t.cuda() for t in batch)
            question_batch, gold_batch, negs_batch = batch
            pos_score, neg_scores = model(question_batch, gold_batch, negs_batch)
            
            pos_score = pos_score.expand_as(neg_scores).reshape(-1)
            neg_scores = neg_scores.reshape(-1)
            assert pos_score.shape == neg_scores.shape
            ones = torch.ones(pos_score.shape)
            if not args.no_cuda:
                ones = ones.cuda()
            loss = criterion(pos_score, neg_scores, ones)
            
            # evaluate train
            result = (torch.sum(pos_score.view(-1, args.neg_size) > neg_scores.view(-1, args.neg_size),-1) == args.neg_size)

            train_right += torch.sum(result).item()
            train_total += len(result)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_epoch += loss

        # 打印
        end_time = datetime.now()
        logger.info('\ntrain epoch %d time span:%s'%(epoch, end_time-start_time))
        print('train loss', loss_epoch.item())
        logger.info('train loss:%f'%loss_epoch.item())
        print('train result', train_right, train_total, 1.0*train_right/train_total)
        logger.info(('train result', train_right, train_total, 1.0*train_right/train_total))

        # eval
        right, total, precision = evaluate_char(args, model, valid_dataset)

        # print
        print('valid result', right, total, precision)
        print('epoch time')
        print(datetime.now())
        print('*'*20)
        logger.info("epoch:%d\t"%epoch+"dev_Accuracy-----------------------%d/%d=%f\n"%(right, total, precision))
        end_time = datetime.now()
        logger.info('dev epoch %d time span:%s'%(epoch,end_time-start_time))
        
        if precision > best_precision:
            best_precision = precision
            iters_left = patience
            print("epoch %d saved\n"%epoch)
            logger.info("epoch %d saved\n"%epoch)
            # Save a trained model
            model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
            output_model_file = os.path.join(args.output_dir, "best_model.bin")
            torch.save(model_to_save.state_dict(), output_model_file)
        else:
            iters_left -= 1
            if iters_left == 0:
                break
    logger.info('finish training!')
    print('finish training!')