def generate_samples(self, curr_bbox, positive, init=False):
        if init:
            if positive:
                n = self.opts['nPos_init']
                Thre = self.opts['posThre_init']
            else:
                n = self.opts['nNeg_init']
                Thre = self.opts['negThre_init']
        else:
            if positive:
                n = self.opts['nPos_online']
                Thre = self.opts['posThre_online']
            else:
                n = self.opts['nNeg_online']
                Thre = self.opts['negThre_online']

        assert n > 0, "if n = 0, don't initialize this class"

        if positive:
            examples = gen_samples('gaussian', curr_bbox, n * 2, self.opts,
                                   self.opts['finetune_trans'],
                                   self.opts['finetune_scale_factor'])
            r = overlap_ratio(examples,
                              np.matlib.repmat(curr_bbox, len(examples), 1))
            examples = examples[np.array(r) > Thre]
            examples = examples[np.random.randint(
                low=0, high=len(examples), size=min(len(examples), n)), :]

            action_labels = gen_action_labels(self.opts['num_actions'],
                                              self.opts, np.array(examples),
                                              curr_bbox)
            # score labels: 1 is positive. 0 is negative
            score_labels = list(np.ones(len(examples), dtype=int))

        else:
            examples = gen_samples('uniform', curr_bbox, n * 2, self.opts, 2,
                                   5)
            r = overlap_ratio(examples,
                              np.matlib.repmat(curr_bbox, len(examples), 1))
            examples = examples[np.array(r) < Thre]
            examples = examples[np.random.randint(
                low=0, high=len(examples), size=min(len(examples), n)), :]

            action_labels = np.full((self.opts['num_actions'], len(examples)),
                                    fill_value=-1)
            # score labels: 1 is positive. 0 is negative
            score_labels = list(np.zeros(len(examples), dtype=int))

        action_labels = np.transpose(action_labels).tolist()
        bboxes = examples
        labels = action_labels

        return bboxes, labels, score_labels
def gen_samples(generator, bbox, n, overlap_range=None, scale_range=None):

    if overlap_range is None and scale_range is None:
        return generator(bbox, n)

    else:
        samples = None
        remain = n
        factor = 2
        while remain > 0 and factor < 16:
            samples_ = generator(bbox, remain * factor)

            idx = np.ones(len(samples_), dtype=bool)
            if overlap_range is not None:
                r = overlap_ratio(samples_, bbox)
                idx *= (r >= overlap_range[0]) * (r <= overlap_range[1])
            if scale_range is not None:
                s = np.prod(samples_[:, 2:], axis=1) / np.prod(bbox[2:])
                idx *= (s >= scale_range[0]) * (s <= scale_range[1])

            samples_ = samples_[idx, :]
            samples_ = samples_[:min(remain, len(samples_))]
            if samples is None:
                samples = samples_
            else:
                samples = np.concatenate([samples, samples_])
            remain = n - len(samples)
            factor = factor * 2

        return samples
Esempio n. 3
0
def iou_precision_plot(bboxes, ground_truth, title, show=True, save_plot=None):
    max_threshold = 100  # used for graphs in the paper

    precisions = np.zeros([max_threshold, 1])

    if len(bboxes) != len(ground_truth):
        print("WARNING: the size of iou and ground_truth are not same")
        # just ignore any extra frames, in either results or ground truth
        n = min(len(bboxes), len(ground_truth))
        positions = bboxes[:n]
        ground_truth = ground_truth[:n]

    iou = overlap_ratio(bboxes, ground_truth)
    iou = np.array(iou)

    # compute precision
    precisions = []
    for p in range(max_threshold):
        precisions.append(len(iou[iou >= p/100.0]) / len(iou))

    # plot
    if show or save_plot:
        if save_plot is not None:
            save_plot += '-iou'
        plot_result(precisions, title, show=show, save_plot=save_plot, xlabel='iou threshold (x0.01)', ylabel='precision')

    return precisions
Esempio n. 4
0
def reward_original(gt, box):
    iou = overlap_ratio(gt, box)
    if iou > 0.7:
        reward = 1
    else:
        reward = -1

    return reward
def gen_action_labels(num_actions, opts, bb_samples, gt_bbox):
    num_samples = len(bb_samples)

    action_labels = np.zeros([num_actions, num_samples])
    m = opts['action_move']

    for j in range(len(bb_samples)):
        bbox = bb_samples[j, :]

        bbox[0] = bbox[0] + 0.5 * bbox[2]
        bbox[1] = bbox[1] + 0.5 * bbox[3]

        deltas = [
            m['x'] * bbox[2], m['y'] * bbox[3], m['w'] * bbox[2],
            m['h'] * bbox[3]
        ]
        # deltas = np.max(deltas)
        ar = bbox[2] / bbox[3]
        if bbox[2] > bbox[3]:
            deltas[3] = deltas[2] / ar
        else:
            deltas[2] = deltas[3] * ar

        deltas = np.matlib.repmat(deltas, num_actions, 1)
        action_deltas = np.multiply(m['deltas'], deltas)

        action_boxes = np.matlib.repmat(bbox, num_actions, 1)
        action_boxes = action_boxes + action_deltas
        action_boxes[:, 0] = action_boxes[:, 0] - 0.5 * action_boxes[:, 2]
        action_boxes[:, 1] = action_boxes[:, 1] - 0.5 * action_boxes[:, 3]

        overs = overlap_ratio(action_boxes,
                              np.matlib.repmat(gt_bbox, num_actions, 1))
        max_action = np.argmax(overs[:-2])  # translation overlap
        max_value = overs[max_action]

        if overs[opts['stop_action']] > opts['stopIou']:
            max_action = opts['stop_action']

        if max_value == overs[opts['stop_action']]:
            max_action = np.argmax(overs[:])  # (trans + scale) action

        action = np.zeros(num_actions)
        action[max_action] = 1
        action_labels[:, j] = action

        # return bbox back
        bbox[0] = bbox[0] - 0.5 * bbox[2]
        bbox[1] = bbox[1] - 0.5 * bbox[3]

    return action_labels  # in real matlab code, they also return overs


# test the module
# from utils.gen_samples import gen_samples
# gt_bbox = [50,50,20,20]
# pos_examples = gen_samples('gaussian', gt_bbox, opts['nPos_train']*5, opts, 0.1, 5)
# gen_action_labels(opts['num_actions'], opts, pos_examples, gt_bbox)
Esempio n. 6
0
    def train(self, X, bbox, gt):
        X = X.cpu().numpy()
        bbox = np.copy(bbox)
        gt = np.copy(gt)

        if gt.ndim == 1:
            gt = gt[None, :]

        r = overlap_ratio(bbox, gt)
        s = np.prod(bbox[:, 2:], axis=1) / np.prod(gt[0, 2:])
        idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
              (s >= self.scale_range[0]) * (s <= self.scale_range[1])

        X = X[idx]
        bbox = bbox[idx]

        Y = self.get_examples(bbox, gt)

        self.model.fit(X, Y)
Esempio n. 7
0
    def predict(self, X, bbox):
        X = X.cpu().numpy()
        bbox_ = np.copy(bbox)

        Y = self.model.predict(X)

        bbox_[:, :2] = bbox_[:, :2] + bbox_[:, 2:] / 2
        bbox_[:, :2] = Y[:, :2] * bbox_[:, 2:] + bbox_[:, :2]
        bbox_[:, 2:] = np.exp(Y[:, 2:]) * bbox_[:, 2:]
        bbox_[:, :2] = bbox_[:, :2] - bbox_[:, 2:] / 2

        r = overlap_ratio(bbox, bbox_)
        s = np.prod(bbox[:, 2:], axis=1) / np.prod(bbox_[:, 2:], axis=1)
        idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
              (s >= self.scale_range[0]) * (s <= self.scale_range[1])
        idx = np.logical_not(idx)
        bbox_[idx] = bbox[idx]

        bbox_[:, :2] = np.maximum(bbox_[:, :2], 0)
        bbox_[:, 2:] = np.minimum(bbox_[:, 2:], self.img_size - bbox[:, :2])

        return bbox_
def do_iou_precise(path_exam, path_gt, thre=0.7):
    '''
    compute the iou and save to output/iou.txt
    print the average iou and precise
    :param path_exam:
    :param path_gt:
    :return:
    '''
    path_home = "output/"
    x1 = np.load(path_exam)
    x2 = np.load(path_gt)
    from utils.overlap_ratio import overlap_ratio
    iou = overlap_ratio(x1, x2)
    np.savetxt(path_home + 'iou.txt', iou,
               fmt='%.06f')  #fmt: keep 6 numbers after dot
    iou = np.array(iou)
    average_iou = iou.mean()
    right_rs = iou > thre
    right_rs = iou[right_rs]
    precise = right_rs.size / iou.size
    print("average_iou: " + str(average_iou) + "  ;\t  precise: " +
          str(precise))
def process_data_mul_step_2(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    train_db_pos_neg_gpu = []
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        # max_dis=15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(0, n_frames - 2, 5):
            for l in range(len(train_i['trackid'][i])):
                train_db_pos_neg = {
                    'img_path': train_i['img_files'][i + 1],
                    'bboxes': [],
                    'labels': [],
                    'score_labels': []
                }
                for k in range(len(train_i['trackid'][i + 1])):
                    if train_i['trackid'][i][l] == train_i['trackid'][i +
                                                                      1][k]:
                        gt_end = train_i['gt'][i + 1][k]

                step_list = []
                box_list = []
                box_list.append(train_i['gt'][i][l])
                for st_list in range(14):
                    iou_max = -1
                    step_max = []
                    box_max = []
                    for lp in range(50):
                        curr_bbox = box_list[-1]
                        step = []
                        box = []
                        for st in range(5):  #step numbers
                            action = random.randint(0, 10)
                            step.append(action)
                            box.append(curr_bbox)
                            curr_bbox = do_action(curr_bbox, opts, action,
                                                  opts['imgSize'])
                        box.append(curr_bbox)
                        step.append(opts['stop_action'])  #stop action
                        c_iou = cal_iou(curr_bbox, gt_end)
                        if c_iou > iou_max:
                            iou_max = c_iou
                            step_max = step
                            box_max = box
                    # if len(step_max)==0:
                    #     print(c_iou,iou_max)
                    step_list.append(step_max[0])
                    box_list.append(box_max[1])
                step_list.append(opts['stop_action'])
                iou_max = cal_iou(box_list[-1], gt_end)
                if iou_max > opts['stopIou']:  #save data to train_db
                    for datai in range(len(step_list)):
                        train_db_pos_neg['bboxes'].append(box_list[datai])
                        action_t = np.zeros(opts['num_actions'])
                        action_t[step_list[datai]] = 1
                        action_label_pos = action_t.tolist()
                        train_db_pos_neg['labels'].append(action_label_pos)
                        train_db_pos_neg['score_labels'].extend(
                            list(np.ones(1, dtype=int)))

                        if (datai) % 3 == 0:
                            nct = -1
                            while True:
                                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                nct += 1
                                if nct == 20:
                                    break
                                neg = gen_samples('gaussian', gt_end, 5, opts,
                                                  2, 10)
                                r = overlap_ratio(
                                    neg, np.matlib.repmat(gt_end, len(neg), 1))
                                # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                neg = neg[np.array(r) <
                                          opts['consecutive_negThre_train']]
                                if len(neg) == 0:
                                    continue
                                    # break
                                else:
                                    pos_neg_box = neg[0]
                                    # print("neg[0]", end=": ")
                                    # print(neg[0])
                                    break
                            train_db_pos_neg['bboxes'].append(pos_neg_box)
                            action_label_neg = np.full(
                                (opts['num_actions'], 1), fill_value=-1)
                            action_label_neg = np.transpose(
                                action_label_neg).tolist()
                            train_db_pos_neg['labels'].extend(action_label_neg)
                            train_db_pos_neg['score_labels'].extend(
                                list(np.zeros(1, dtype=int)))
                        # train_db_pos_neg_gpu.append(train_db_pos_neg)

                # if len(train_db_pos_neg['bboxes']) >0:
                # print(iou_max,len(train_db_pos_neg['bboxes']))
                if len(train_db_pos_neg['bboxes']) == 20:
                    train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_mul_step_3(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    # train_db_pos_neg_gpu = []
    train_db_pos_neg = {
        'img_path': [],  # train_i['img_files'][i],
        'bboxes': [],
        'labels': [],
        'score_labels': []
    }
    distan = 1
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        # max_dis=15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(0, n_frames - distan - 1, 5):
            for l in range(len(train_i['trackid'][i])):
                # train_db_pos_neg = {
                #     'img_path': train_i['img_files'][i + distan],
                #     'bboxes': [],
                #     'labels': [],
                #     'score_labels': []
                # }
                for k in range(len(train_i['trackid'][i + distan])):
                    if train_i['trackid'][i][l] == train_i['trackid'][
                            i + distan][k]:
                        gt_end = train_i['gt'][i + distan][k]
                iou_max = 0
                step_max = []
                box_max = []
                curr_bbox = train_i['gt'][i][l]
                # if i==5:
                #     print("debug")
                for st in range(15):
                    box_max.append(curr_bbox)
                    t_iou_max = 0
                    t_box_max = []
                    t_act_max = -1
                    for action in range(11):
                        curr_bbox_t = do_action(curr_bbox, opts, action,
                                                opts['imgSize'])
                        t_iou = cal_iou(curr_bbox_t, gt_end)
                        if action == opts['stop_action']:
                            t_iou_act_stop = t_iou
                            t_box_act_stop = curr_bbox_t
                        if t_iou > t_iou_max:
                            t_iou_max = t_iou
                            t_act_max = action
                            t_box_max = curr_bbox_t
                    if abs(t_iou_act_stop - t_iou_max
                           ) < 0.005 and t_act_max != opts['stop_action']:
                        t_iou_max = t_iou_act_stop
                        t_act_max = opts['stop_action']
                        t_box_max = t_box_act_stop
                    if t_act_max == -1:
                        break
                    iou_max = t_iou_max
                    # if st==0:
                    #     print("")
                    #     print("start iou: %f,"%(t_iou_act_stop),end='  ')
                    # print("do %d -> %f,"%(t_act_max,iou_max),end='  ')
                    if t_act_max == opts['stop_action']:
                        step_max.append(opts['stop_action'])
                        break
                    else:
                        step_max.append(t_act_max)
                        curr_bbox = t_box_max

                # for lp in range(500):
                #     curr_bbox = train_i['gt'][i][l]
                #     step=[]
                #     box=[]
                #     for st in range(5): #step numbers
                #         action=random.randint(0, 10)
                #         # if st==0:
                #         #     print(action)
                #         step.append(action)
                #         box.append(curr_bbox)
                #         curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize'])
                #     box.append(curr_bbox)
                #     step.append(opts['stop_action'])  #stop action
                #     # c_iou=cal_iou(curr_bbox,gt_end)
                #     t_iou_max=cal_iou(curr_bbox,gt_end)
                #     t_max_n=-1
                #     for st in range(5):
                #         t_iou=cal_iou(box[st],gt_end)
                #         if t_iou>t_iou_max:
                #             t_iou_max=t_iou
                #             t_max_n=st
                #     if t_max_n>-1:
                #         box=box[:t_max_n+1]
                #         step=step[:t_max_n]
                #         step.append(opts['stop_action'])
                #     if t_iou_max>iou_max:
                #         iou_max=t_iou_max
                #         step_max=step
                #         box_max=box
                if iou_max > opts['stopIou']:  #save data to train_db
                    for datai in range(len(step_max)):
                        train_db_pos_neg['img_path'].append(
                            train_i['img_files'][i + distan])
                        train_db_pos_neg['bboxes'].append(box_max[datai])
                        action_t = np.zeros(opts['num_actions'])
                        action_t[step_max[datai]] = 1
                        action_label_pos = action_t.tolist()
                        train_db_pos_neg['labels'].append(action_label_pos)
                        train_db_pos_neg['score_labels'].extend(
                            list(np.ones(1, dtype=int)))

                        if (datai) % 3 == 0:
                            nct = -1
                            while True:
                                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                nct += 1
                                if nct == 20:
                                    break
                                neg = gen_samples('gaussian', gt_end, 5, opts,
                                                  2, 10)
                                r = overlap_ratio(
                                    neg, np.matlib.repmat(gt_end, len(neg), 1))
                                # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                neg = neg[np.array(r) <
                                          opts['consecutive_negThre_train']]
                                if len(neg) == 0:
                                    continue
                                    # break
                                else:
                                    pos_neg_box = neg[0]
                                    # print("neg[0]", end=": ")
                                    # print(neg[0])
                                    break
                            train_db_pos_neg['img_path'].append(
                                train_i['img_files'][i + distan])
                            train_db_pos_neg['bboxes'].append(pos_neg_box)
                            action_label_neg = np.full(
                                (opts['num_actions'], 1), fill_value=-1)
                            action_label_neg = np.transpose(
                                action_label_neg).tolist()
                            train_db_pos_neg['labels'].extend(action_label_neg)
                            train_db_pos_neg['score_labels'].extend(
                                list(np.zeros(1, dtype=int)))
                        # train_db_pos_neg_gpu.append(train_db_pos_neg)

                # if len(train_db_pos_neg['bboxes']) >0:
                # print(iou_max,len(train_db_pos_neg['bboxes']))
                # if len(train_db_pos_neg['bboxes']) == 20:
                #     train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        # train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        train_db_pos_neg_all.append(train_db_pos_neg)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_ILSVR_consecutive_frame(img_paths, opt, train_db_pos_neg_all,
                                         lock):
    opts = opt.copy()
    # train_db_pos_neg_gpu = []
    train_db_pos_neg = {
        'img_path': [],  # train_i['img_files'][i],
        'bboxes': [],
        'labels': [],
        'score_labels': []
    }
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        max_dis = 15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(n_frames - 1, 0, -1):
            # train_db_pos_neg = {
            #     'img_path': train_i['img_files'][i],
            #     'bboxes': [],
            #     'labels': [],
            #     'score_labels': []
            # }
            # del_t=len(train_i['trackid'][i])
            # if del_t>1:
            #     print("debug")
            for l in range(len(train_i['trackid'][i])):
                gt_bbox = train_i['gt'][i][l]
                # train_db_pos_neg = {
                #     'img_path': [],#train_i['img_files'][i],
                #     'bboxes': [],
                #     'labels': [],
                #     'score_labels': []
                # }
                bk_sign = False
                for j in range(i - 1, i - max_dis - 1, -1):
                    if j < 0:
                        break
                    for k in range(len(train_i['trackid'][j])):

                        if train_i['trackid'][j][k] == train_i['trackid'][i][
                                l]:
                            # train_db_pos_neg = {
                            #     'img_path': train_i['img_files'][i],
                            #     'bboxes': [],
                            #     'labels': [],
                            #     'score_labels': []
                            # }
                            pos_neg_box = train_i['gt'][j][k]
                            c_iou = cal_iou(pos_neg_box, gt_bbox)
                            # del_iou=cal_iou(pos_neg_box,gt_bbox)
                            # print(i-j,del_iou)
                            if c_iou > 0.7:
                                action_label_pos, _ = gen_action_pos_neg_labels(
                                    opts['num_actions'], opts,
                                    np.array(pos_neg_box), gt_bbox)

                                train_db_pos_neg['img_path'].append(
                                    train_i['img_files'][i])
                                train_db_pos_neg['bboxes'].append(pos_neg_box)
                                action_label_pos = np.transpose(
                                    action_label_pos).tolist()
                                train_db_pos_neg['labels'].extend(
                                    action_label_pos)
                                train_db_pos_neg['score_labels'].extend(
                                    list(np.ones(1, dtype=int)))
                                # train_db_pos_neg_gpu.append(train_db_pos_neg)
                            else:
                                bk_sign = True
                                break

                            # train_db_pos_neg = {
                            #     'img_path': train_i['img_files'][i],
                            #     'bboxes': [],
                            #     'labels': [],
                            #     'score_labels': []
                            # }
                            if (i - j) % 3 == 0:
                                nct = -1
                                while True:
                                    # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                    nct += 1
                                    if nct == 20:
                                        break
                                    neg = gen_samples('gaussian', gt_bbox, 5,
                                                      opts, 2, 10)
                                    r = overlap_ratio(
                                        neg,
                                        np.matlib.repmat(gt_bbox, len(neg), 1))
                                    # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                    neg = neg[np.array(
                                        r) < opts['consecutive_negThre_train']]
                                    if len(neg) == 0:
                                        continue
                                        # break
                                    else:
                                        pos_neg_box = neg[0]
                                        # print("neg[0]", end=": ")
                                        # print(neg[0])
                                        break
                                train_db_pos_neg['img_path'].append(
                                    train_i['img_files'][i])
                                train_db_pos_neg['bboxes'].append(pos_neg_box)
                                action_label_neg = np.full(
                                    (opts['num_actions'], 1), fill_value=-1)
                                action_label_neg = np.transpose(
                                    action_label_neg).tolist()
                                train_db_pos_neg['labels'].extend(
                                    action_label_neg)
                                train_db_pos_neg['score_labels'].extend(
                                    list(np.zeros(1, dtype=int)))
                            # train_db_pos_neg_gpu.append(train_db_pos_neg)
                    if bk_sign == True:
                        break

                # if len(train_db_pos_neg['bboxes']) >0:
                # if len(train_db_pos_neg['bboxes']) == 20:
                #     train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        # train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        train_db_pos_neg_all.append(train_db_pos_neg)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_vot(train_sequences, vid_info, opt, train_db_pos,
                     train_db_neg, lock):
    opts = opt.copy()
    train_db_pos_gpu = []
    train_db_neg_gpu = []

    for train_i in range(len(train_sequences)):
        train_db_pos_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        img_idx = train_sequences[train_i]
        gt_bbox = vid_info['gt'][img_idx]

        if len(gt_bbox) == 0:
            continue

        pos_examples = []
        while len(pos_examples) < opts['nPos_train']:
            pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                              opts, 0.1, 5)
            r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
            pos = pos[np.array(r) > opts['posThre_train']]
            if len(pos) == 0:
                continue
            pos = pos[np.random.
                      randint(low=0,
                              high=len(pos),
                              size=min(len(pos), opts['nPos_train'] -
                                       len(pos_examples))), :]
            pos_examples.extend(pos)

        neg_examples = []
        while len(neg_examples) < opts['nNeg_train']:
            # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
            neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                              opts, 2, 10)
            r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
            neg = neg[np.array(r) < opts['negThre_train']]
            if len(neg) == 0:
                continue
            neg = neg[np.random.
                      randint(low=0,
                              high=len(neg),
                              size=min(len(neg), opts['nNeg_train'] -
                                       len(neg_examples))), :]
            neg_examples.extend(neg)

        # examples = pos_examples + neg_examples
        action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                              np.array(pos_examples), gt_bbox)
        action_labels_neg = np.full((opts['num_actions'], len(neg_examples)),
                                    fill_value=-1)

        action_labels_pos = np.transpose(action_labels_pos).tolist()
        action_labels_neg = np.transpose(action_labels_neg).tolist()

        # action_labels = action_labels_pos + action_labels_neg

        train_db_pos_['img_path'] = np.full(len(pos_examples),
                                            vid_info['img_files'][img_idx])
        train_db_pos_['bboxes'] = pos_examples
        train_db_pos_['labels'] = action_labels_pos
        # score labels: 1 is positive. 0 is negative
        train_db_pos_['score_labels'] = list(
            np.ones(len(pos_examples), dtype=int))

        train_db_neg_['img_path'] = np.full(len(neg_examples),
                                            vid_info['img_files'][img_idx])
        train_db_neg_['bboxes'] = neg_examples
        train_db_neg_['labels'] = action_labels_neg
        # score labels: 1 is positive. 0 is negative
        train_db_neg_['score_labels'] = list(
            np.zeros(len(neg_examples), dtype=int))

        train_db_pos_gpu.append(train_db_pos_)
        train_db_neg_gpu.append(train_db_neg_)

    try:
        lock.acquire()
        #print("len(train_db_pos_gpu): %d"%len(train_db_pos_gpu))
        train_db_pos.extend(train_db_pos_gpu)
        #print("len(train_db_pos): %d" % len(train_db_pos))
        #print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu))
        train_db_neg.extend(train_db_neg_gpu)
        #print("len(train_db_neg): %d" % len(train_db_neg))
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_ILSVR(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    train_db_pos_neg_gpu = []
    # train_db_neg_gpu = []
    for train_i in img_paths:
        train_db_pos_ = {
            'img_path': '',
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': '',
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        #img_idx = train_sequences[train_i]
        #gt_bbox = vid_info['gt'][img_idx]

        #if len(gt_bbox) == 0:
        #    continue
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i + '.xml'
        #gt_bbox=get_xml_box_label(gt_file_path)
        #opts['imgSize'] = get_xml_img_size(gt_file_path)
        imginfo = get_xml_img_info(gt_file_path)
        gt_bboxs = imginfo['gts']
        opts['imgSize'] = imginfo['imgsize']
        img_path = '../datasets/data/ILSVRC/Data/VID/train/' + train_i + '.JPEG'
        for gt_bbox in gt_bboxs:
            train_db_pos_neg = {
                'img_path': '',
                'bboxes': [],
                'labels': [],
                'score_labels': []
            }
            pos_examples = []
            while len(pos_examples) < opts['nPos_train']:
                pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                                  opts, 0.1, 5)
                r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
                pos = pos[np.array(r) > opts['posThre_train']]
                if len(pos) == 0:
                    #continue
                    break
                pos = pos[np.random.randint(low=0,
                                            high=len(pos),
                                            size=min(
                                                len(pos), opts['nPos_train'] -
                                                len(pos_examples))), :]
                pos_examples.extend(pos)

            neg_examples = []
            while len(neg_examples) < opts['nNeg_train']:
                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                                  opts, 2, 10)
                r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
                neg = neg[np.array(r) < opts['negThre_train']]
                if len(neg) == 0:
                    #continue
                    break
                neg = neg[np.random.randint(low=0,
                                            high=len(neg),
                                            size=min(
                                                len(neg), opts['nNeg_train'] -
                                                len(neg_examples))), :]
                neg_examples.extend(neg)

            # examples = pos_examples + neg_examples
            action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                                  np.array(pos_examples),
                                                  gt_bbox)
            action_labels_neg = np.full(
                (opts['num_actions'], len(neg_examples)), fill_value=-1)

            action_labels_pos = np.transpose(action_labels_pos).tolist()
            action_labels_neg = np.transpose(action_labels_neg).tolist()

            # action_labels = action_labels_pos + action_labels_neg

            # train_db_pos_['bboxes'].extend(pos_examples)
            # train_db_pos_['labels'].extend(action_labels_pos)
            # # score labels: 1 is positive. 0 is negative
            # train_db_pos_['score_labels'].extend(list(np.ones(len(pos_examples), dtype=int)))
            #
            #
            # train_db_neg_['bboxes'].extend(neg_examples)
            # train_db_neg_['labels'].extend(action_labels_neg)
            # # score labels: 1 is positive. 0 is negative
            # train_db_neg_['score_labels'].extend(list(np.zeros(len(neg_examples), dtype=int)))

            train_db_pos_neg['bboxes'].extend(pos_examples)
            train_db_pos_neg['labels'].extend(action_labels_pos)
            # score labels: 1 is positive. 0 is negative
            train_db_pos_neg['score_labels'].extend(
                list(np.ones(len(pos_examples), dtype=int)))

            train_db_pos_neg['bboxes'].extend(neg_examples)
            train_db_pos_neg['labels'].extend(action_labels_neg)
            # score labels: 1 is positive. 0 is negative
            train_db_pos_neg['score_labels'].extend(
                list(np.zeros(len(neg_examples), dtype=int)))

            train_db_pos_neg['img_path'] = img_path
            # train_db_pos_['img_path'] = img_path
            # train_db_neg_['img_path'] = img_path

            # if len(train_db_pos_['bboxes']) != 0 and len(train_db_neg_['bboxes']) != 0:
            #     train_db_pos_gpu.append(train_db_pos_)
            #     train_db_neg_gpu.append(train_db_neg_)
            if len(train_db_pos_neg['bboxes']) == (opts['nPos_train'] +
                                                   opts['nNeg_train']):
                train_db_pos_neg_gpu.append(train_db_pos_neg)
                # train_db_neg_gpu.append(train_db_neg_)
            # box_ii += 1

        # img_ii += 1

        # if img_ii==3471:
        #     print("when gt_skip set to 200, and the img_ii=3472, the gen_samples function can't produce examples that iou>thred")
        #     #'ILSVRC2015_VID_train_0002/ILSVRC2015_train_00633000/000025'
        #reason:the img is so small and unclear
        # if img_ii%1000==0 and img_ii!=0:
        #     t9=time.time()
        #     real_time=t9-t2
        #     all_time=t9-t0
        #     all_h=all_time//3600
        #     all_m=all_time%3600//60
        #     all_s=all_time%60
        #     speed_img=1000/real_time
        #     speed_box=(box_ii-box_ii_start)/real_time
        #     all_speed_img=img_ii/all_time
        #     all_speed_box = box_ii/all_time
        #     print('\ndone imgs: %d , done boxes: %d , all imgs: %d. '%(img_ii,box_ii,all_img_num))
        #     print('real_time speed: %d imgs/s, %d boxes/s'%(speed_img,speed_box))
        #     print('avg_time speed: %d imgs/s, %d boxes/s' % (all_speed_img, all_speed_box))
        #     print('spend time: %d h  %d m  %d s (%d s)'%(all_h,all_m,all_s,all_time))
        #     box_ii_start=box_ii
        #     t2=time.time()
    try:
        lock.acquire()
        # print("len(train_db_pos_gpu): %d" % len(train_db_pos_gpu))
        train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        # print("len(train_db_pos): %d" % len(train_db_pos))
        # print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu))
        # train_db_neg.extend(train_db_neg_gpu)
        # print("len(train_db_neg): %d" % len(train_db_neg))
    except Exception as err:
        raise err
    finally:
        lock.release()
Esempio n. 14
0
def run_tracking(
        img_list,
        init_bbox,
        gt=None,
        savefig_dir='',
        display=False,
        siamfc_path="../models/siamfc_pretrained.pth",
        policy_path="../models/template_policy/11200_template_policy.pth",
        gpu_id=0):

    rate = init_bbox[2] / init_bbox[3]
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    # result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    # result_bb[0] = target_bbox
    success = 1
    actor = Actor()  #.load_state_dict(torch.load("../Models/500_actor.pth"))

    pretrained_act_dict = torch.load(
        "../models/Double_agent/95600_DA_actor.pth")

    actor_dict = actor.state_dict()

    pretrained_act_dict = {
        k: v
        for k, v in pretrained_act_dict.items() if k in actor_dict
    }

    actor_dict.update(pretrained_act_dict)

    actor.load_state_dict(actor_dict)

    siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id)
    siamEmbed = siam = SiameseNet(BaselineEmbeddingNet())
    T_N = opts['T_N']
    pi = T_Policy(T_N)
    weights_init(pi)
    pretrained_pi_dict = torch.load(
        '../models/template_policy/95600_template_policy.pth')
    pi_dict = pi.state_dict()
    pretrained_pi_dict = {
        k: v
        for k, v in pretrained_pi_dict.items() if k in pi_dict
    }
    # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")}
    pi_dict.update(pretrained_pi_dict)
    pi.load_state_dict(pi_dict)

    if opts['use_gpu']:
        actor = actor.cuda()
        siamEmbed = siamEmbed.cuda()
        pi = pi.cuda()

    image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB)
    #init

    deta_flag, out_flag_first = init_actor(actor, image, target_bbox)
    template = siamfc.init(image, target_bbox)
    # t = template
    templates = []
    for i in range(T_N):
        templates.append(template)
    spf_total = 0
    # Display
    savefig = 0

    if display or savefig:
        dpi = 80.0
        figsize = (image.shape[1] / dpi, image.shape[0] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image)

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result[0, :2]),
                             result[0, 2],
                             result[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)
    imageVar_first = cv2.Laplacian(
        crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var()
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = cv2.cvtColor(cv2.imread(img_list[i]), cv2.COLOR_BGR2RGB)
        np_img = np.array(
            cv2.resize(image, (255, 255),
                       interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
        np_imgs = []
        for i in range(T_N):
            np_imgs.append(np_img)
        if imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np.array(image), target_bbox),
                cv2.CV_64F).var()
        else:
            imageVar = 200

        if opts['use_gpu']:
            responses = siamEmbed(
                torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                torch.Tensor(np_imgs).float().cuda())
        else:
            responses = siamEmbed(
                torch.Tensor(templates).permute(0, 3, 1, 2).float(),
                torch.Tensor(np_imgs).float())
        # responses = []
        # for i in range(T_N):
        #     template = templates[i]
        #     response = siamfc.response_map(image, template)
        #     responses.append(response[None,:,:])
        if opts['use_gpu']:
            pi_input = torch.Tensor(responses.cpu()).permute(1, 0, 2, 3).cuda()
            action = pi(pi_input).cpu().detach().numpy()
        else:
            pi_input = torch.Tensor(responses).permute(1, 0, 2, 3)
            action = pi(pi_input).numpy()
        action_id = np.argmax(action)
        template = templates[action_id]
        siam_box = siamfc.update(image, templates[0])
        siam_box = np.round([
            siam_box[0], siam_box[1], siam_box[2] - siam_box[0],
            siam_box[3] - siam_box[1]
        ])
        print(siam_box)
        # Estimate target bbox
        img_g, img_l, out_flag = getbatch_actor(
            np.array(image),
            np.array(siam_box).reshape([1, 4]))
        deta_pos = actor(img_l, img_g)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if deta_flag or (out_flag and not out_flag_first):
            deta_pos[:, 2] = 0

        pos_ = np.round(
            move_crop_tracking(np.array(siam_box), deta_pos,
                               (image.shape[1], image.shape[0]), rate))

        if imageVar > 100:
            target_bbox = pos_
            result[i] = target_bbox
        if i % 10 == 0:
            template = siamfc.init(image, pos_)
            templates.append(template)
            templates.pop(1)

        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result[i, :2])
            rect.set_width(result[i, 2])
            rect.set_height(result[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)),
                            dpi=dpi)
        if display:
            if gt is None:
                print
                ("Frame %d/%d,  Time %.3f" % \
                (i, len(img_list), spf))
            else:
                if opts['show_train']:
                    print
                    ("Frame %d/%d, Overlap %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \
                    (i, len(img_list), overlap_ratio(gt[i], result[i])[0], spf, target_bbox[0],
                     target_bbox[1], target_bbox[2], target_bbox[3], imageVar))

    fps = len(img_list) / spf_total
    return result, fps
Esempio n. 15
0
def get_train_dbs(vid_info, opts):
    img = cv2.imread(vid_info['img_files'][0])

    opts['scale_factor'] = 1.05
    opts['imgSize'] = list(img.shape)
    gt_skip = opts['train']['gt_skip']

    if vid_info['db_name'] == 'alov300':
        train_sequences = vid_info['gt_use'] == 1
    else:
        train_sequences = list(range(0, vid_info['nframes'], gt_skip))

    train_db_pos = []
    train_db_neg = []

    for train_i in range(len(train_sequences)):
        train_db_pos_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        img_idx = train_sequences[train_i]
        gt_bbox = vid_info['gt'][img_idx]

        if len(gt_bbox) == 0:
            continue

        pos_examples = []
        while len(pos_examples) < opts['nPos_train']:
            pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                              opts, 0.1, 5)
            r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
            pos = pos[np.array(r) > opts['posThre_train']]
            if len(pos) == 0:
                continue
            pos = pos[np.random.
                      randint(low=0,
                              high=len(pos),
                              size=min(len(pos), opts['nPos_train'] -
                                       len(pos_examples))), :]
            pos_examples.extend(pos)

        neg_examples = []
        while len(neg_examples) < opts['nNeg_train']:
            # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
            neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                              opts, 2, 10)
            r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
            neg = neg[np.array(r) < opts['negThre_train']]
            if len(neg) == 0:
                continue
            neg = neg[np.random.
                      randint(low=0,
                              high=len(neg),
                              size=min(len(neg), opts['nNeg_train'] -
                                       len(neg_examples))), :]
            neg_examples.extend(neg)

        show_examples_test(pos_examples, neg_examples,
                           vid_info['img_files'][img_idx])
        # examples = pos_examples + neg_examples
        action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                              np.array(pos_examples), gt_bbox)
        action_labels_neg = np.full((opts['num_actions'], len(neg_examples)),
                                    fill_value=-1)

        action_labels_pos = np.transpose(action_labels_pos).tolist()
        action_labels_neg = np.transpose(action_labels_neg).tolist()

        # action_labels = action_labels_pos + action_labels_neg

        train_db_pos_['img_path'] = np.full(len(pos_examples),
                                            vid_info['img_files'][img_idx])
        train_db_pos_['bboxes'] = pos_examples
        train_db_pos_['labels'] = action_labels_pos
        # score labels: 1 is positive. 0 is negative
        train_db_pos_['score_labels'] = list(
            np.ones(len(pos_examples), dtype=int))

        train_db_neg_['img_path'] = np.full(len(neg_examples),
                                            vid_info['img_files'][img_idx])
        train_db_neg_['bboxes'] = neg_examples
        train_db_neg_['labels'] = action_labels_neg
        # score labels: 1 is positive. 0 is negative
        train_db_neg_['score_labels'] = list(
            np.zeros(len(neg_examples), dtype=int))

        train_db_pos.append(train_db_pos_)
        train_db_neg.append(train_db_neg_)

    return train_db_pos, train_db_neg