def generate_samples(self, curr_bbox, positive, init=False):
        if init:
            if positive:
                n = self.opts['nPos_init']
                Thre = self.opts['posThre_init']
            else:
                n = self.opts['nNeg_init']
                Thre = self.opts['negThre_init']
        else:
            if positive:
                n = self.opts['nPos_online']
                Thre = self.opts['posThre_online']
            else:
                n = self.opts['nNeg_online']
                Thre = self.opts['negThre_online']

        assert n > 0, "if n = 0, don't initialize this class"

        if positive:
            examples = gen_samples('gaussian', curr_bbox, n * 2, self.opts,
                                   self.opts['finetune_trans'],
                                   self.opts['finetune_scale_factor'])
            r = overlap_ratio(examples,
                              np.matlib.repmat(curr_bbox, len(examples), 1))
            examples = examples[np.array(r) > Thre]
            examples = examples[np.random.randint(
                low=0, high=len(examples), size=min(len(examples), n)), :]

            action_labels = gen_action_labels(self.opts['num_actions'],
                                              self.opts, np.array(examples),
                                              curr_bbox)
            # score labels: 1 is positive. 0 is negative
            score_labels = list(np.ones(len(examples), dtype=int))

        else:
            examples = gen_samples('uniform', curr_bbox, n * 2, self.opts, 2,
                                   5)
            r = overlap_ratio(examples,
                              np.matlib.repmat(curr_bbox, len(examples), 1))
            examples = examples[np.array(r) < Thre]
            examples = examples[np.random.randint(
                low=0, high=len(examples), size=min(len(examples), n)), :]

            action_labels = np.full((self.opts['num_actions'], len(examples)),
                                    fill_value=-1)
            # score labels: 1 is positive. 0 is negative
            score_labels = list(np.zeros(len(examples), dtype=int))

        action_labels = np.transpose(action_labels).tolist()
        bboxes = examples
        labels = action_labels

        return bboxes, labels, score_labels
def process_data_mul_step_2(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    train_db_pos_neg_gpu = []
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        # max_dis=15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(0, n_frames - 2, 5):
            for l in range(len(train_i['trackid'][i])):
                train_db_pos_neg = {
                    'img_path': train_i['img_files'][i + 1],
                    'bboxes': [],
                    'labels': [],
                    'score_labels': []
                }
                for k in range(len(train_i['trackid'][i + 1])):
                    if train_i['trackid'][i][l] == train_i['trackid'][i +
                                                                      1][k]:
                        gt_end = train_i['gt'][i + 1][k]

                step_list = []
                box_list = []
                box_list.append(train_i['gt'][i][l])
                for st_list in range(14):
                    iou_max = -1
                    step_max = []
                    box_max = []
                    for lp in range(50):
                        curr_bbox = box_list[-1]
                        step = []
                        box = []
                        for st in range(5):  #step numbers
                            action = random.randint(0, 10)
                            step.append(action)
                            box.append(curr_bbox)
                            curr_bbox = do_action(curr_bbox, opts, action,
                                                  opts['imgSize'])
                        box.append(curr_bbox)
                        step.append(opts['stop_action'])  #stop action
                        c_iou = cal_iou(curr_bbox, gt_end)
                        if c_iou > iou_max:
                            iou_max = c_iou
                            step_max = step
                            box_max = box
                    # if len(step_max)==0:
                    #     print(c_iou,iou_max)
                    step_list.append(step_max[0])
                    box_list.append(box_max[1])
                step_list.append(opts['stop_action'])
                iou_max = cal_iou(box_list[-1], gt_end)
                if iou_max > opts['stopIou']:  #save data to train_db
                    for datai in range(len(step_list)):
                        train_db_pos_neg['bboxes'].append(box_list[datai])
                        action_t = np.zeros(opts['num_actions'])
                        action_t[step_list[datai]] = 1
                        action_label_pos = action_t.tolist()
                        train_db_pos_neg['labels'].append(action_label_pos)
                        train_db_pos_neg['score_labels'].extend(
                            list(np.ones(1, dtype=int)))

                        if (datai) % 3 == 0:
                            nct = -1
                            while True:
                                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                nct += 1
                                if nct == 20:
                                    break
                                neg = gen_samples('gaussian', gt_end, 5, opts,
                                                  2, 10)
                                r = overlap_ratio(
                                    neg, np.matlib.repmat(gt_end, len(neg), 1))
                                # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                neg = neg[np.array(r) <
                                          opts['consecutive_negThre_train']]
                                if len(neg) == 0:
                                    continue
                                    # break
                                else:
                                    pos_neg_box = neg[0]
                                    # print("neg[0]", end=": ")
                                    # print(neg[0])
                                    break
                            train_db_pos_neg['bboxes'].append(pos_neg_box)
                            action_label_neg = np.full(
                                (opts['num_actions'], 1), fill_value=-1)
                            action_label_neg = np.transpose(
                                action_label_neg).tolist()
                            train_db_pos_neg['labels'].extend(action_label_neg)
                            train_db_pos_neg['score_labels'].extend(
                                list(np.zeros(1, dtype=int)))
                        # train_db_pos_neg_gpu.append(train_db_pos_neg)

                # if len(train_db_pos_neg['bboxes']) >0:
                # print(iou_max,len(train_db_pos_neg['bboxes']))
                if len(train_db_pos_neg['bboxes']) == 20:
                    train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_mul_step_3(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    # train_db_pos_neg_gpu = []
    train_db_pos_neg = {
        'img_path': [],  # train_i['img_files'][i],
        'bboxes': [],
        'labels': [],
        'score_labels': []
    }
    distan = 1
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        # max_dis=15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(0, n_frames - distan - 1, 5):
            for l in range(len(train_i['trackid'][i])):
                # train_db_pos_neg = {
                #     'img_path': train_i['img_files'][i + distan],
                #     'bboxes': [],
                #     'labels': [],
                #     'score_labels': []
                # }
                for k in range(len(train_i['trackid'][i + distan])):
                    if train_i['trackid'][i][l] == train_i['trackid'][
                            i + distan][k]:
                        gt_end = train_i['gt'][i + distan][k]
                iou_max = 0
                step_max = []
                box_max = []
                curr_bbox = train_i['gt'][i][l]
                # if i==5:
                #     print("debug")
                for st in range(15):
                    box_max.append(curr_bbox)
                    t_iou_max = 0
                    t_box_max = []
                    t_act_max = -1
                    for action in range(11):
                        curr_bbox_t = do_action(curr_bbox, opts, action,
                                                opts['imgSize'])
                        t_iou = cal_iou(curr_bbox_t, gt_end)
                        if action == opts['stop_action']:
                            t_iou_act_stop = t_iou
                            t_box_act_stop = curr_bbox_t
                        if t_iou > t_iou_max:
                            t_iou_max = t_iou
                            t_act_max = action
                            t_box_max = curr_bbox_t
                    if abs(t_iou_act_stop - t_iou_max
                           ) < 0.005 and t_act_max != opts['stop_action']:
                        t_iou_max = t_iou_act_stop
                        t_act_max = opts['stop_action']
                        t_box_max = t_box_act_stop
                    if t_act_max == -1:
                        break
                    iou_max = t_iou_max
                    # if st==0:
                    #     print("")
                    #     print("start iou: %f,"%(t_iou_act_stop),end='  ')
                    # print("do %d -> %f,"%(t_act_max,iou_max),end='  ')
                    if t_act_max == opts['stop_action']:
                        step_max.append(opts['stop_action'])
                        break
                    else:
                        step_max.append(t_act_max)
                        curr_bbox = t_box_max

                # for lp in range(500):
                #     curr_bbox = train_i['gt'][i][l]
                #     step=[]
                #     box=[]
                #     for st in range(5): #step numbers
                #         action=random.randint(0, 10)
                #         # if st==0:
                #         #     print(action)
                #         step.append(action)
                #         box.append(curr_bbox)
                #         curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize'])
                #     box.append(curr_bbox)
                #     step.append(opts['stop_action'])  #stop action
                #     # c_iou=cal_iou(curr_bbox,gt_end)
                #     t_iou_max=cal_iou(curr_bbox,gt_end)
                #     t_max_n=-1
                #     for st in range(5):
                #         t_iou=cal_iou(box[st],gt_end)
                #         if t_iou>t_iou_max:
                #             t_iou_max=t_iou
                #             t_max_n=st
                #     if t_max_n>-1:
                #         box=box[:t_max_n+1]
                #         step=step[:t_max_n]
                #         step.append(opts['stop_action'])
                #     if t_iou_max>iou_max:
                #         iou_max=t_iou_max
                #         step_max=step
                #         box_max=box
                if iou_max > opts['stopIou']:  #save data to train_db
                    for datai in range(len(step_max)):
                        train_db_pos_neg['img_path'].append(
                            train_i['img_files'][i + distan])
                        train_db_pos_neg['bboxes'].append(box_max[datai])
                        action_t = np.zeros(opts['num_actions'])
                        action_t[step_max[datai]] = 1
                        action_label_pos = action_t.tolist()
                        train_db_pos_neg['labels'].append(action_label_pos)
                        train_db_pos_neg['score_labels'].extend(
                            list(np.ones(1, dtype=int)))

                        if (datai) % 3 == 0:
                            nct = -1
                            while True:
                                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                nct += 1
                                if nct == 20:
                                    break
                                neg = gen_samples('gaussian', gt_end, 5, opts,
                                                  2, 10)
                                r = overlap_ratio(
                                    neg, np.matlib.repmat(gt_end, len(neg), 1))
                                # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                neg = neg[np.array(r) <
                                          opts['consecutive_negThre_train']]
                                if len(neg) == 0:
                                    continue
                                    # break
                                else:
                                    pos_neg_box = neg[0]
                                    # print("neg[0]", end=": ")
                                    # print(neg[0])
                                    break
                            train_db_pos_neg['img_path'].append(
                                train_i['img_files'][i + distan])
                            train_db_pos_neg['bboxes'].append(pos_neg_box)
                            action_label_neg = np.full(
                                (opts['num_actions'], 1), fill_value=-1)
                            action_label_neg = np.transpose(
                                action_label_neg).tolist()
                            train_db_pos_neg['labels'].extend(action_label_neg)
                            train_db_pos_neg['score_labels'].extend(
                                list(np.zeros(1, dtype=int)))
                        # train_db_pos_neg_gpu.append(train_db_pos_neg)

                # if len(train_db_pos_neg['bboxes']) >0:
                # print(iou_max,len(train_db_pos_neg['bboxes']))
                # if len(train_db_pos_neg['bboxes']) == 20:
                #     train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        # train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        train_db_pos_neg_all.append(train_db_pos_neg)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_ILSVR_consecutive_frame(img_paths, opt, train_db_pos_neg_all,
                                         lock):
    opts = opt.copy()
    # train_db_pos_neg_gpu = []
    train_db_pos_neg = {
        'img_path': [],  # train_i['img_files'][i],
        'bboxes': [],
        'labels': [],
        'score_labels': []
    }
    for train_i in img_paths:
        n_frames = len(train_i['gt'])
        max_dis = 15
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[
            'img_files'][0][39:-5] + '.xml'
        imginfo = get_xml_img_info(gt_file_path)
        opts['imgSize'] = imginfo['imgsize']

        for i in range(n_frames - 1, 0, -1):
            # train_db_pos_neg = {
            #     'img_path': train_i['img_files'][i],
            #     'bboxes': [],
            #     'labels': [],
            #     'score_labels': []
            # }
            # del_t=len(train_i['trackid'][i])
            # if del_t>1:
            #     print("debug")
            for l in range(len(train_i['trackid'][i])):
                gt_bbox = train_i['gt'][i][l]
                # train_db_pos_neg = {
                #     'img_path': [],#train_i['img_files'][i],
                #     'bboxes': [],
                #     'labels': [],
                #     'score_labels': []
                # }
                bk_sign = False
                for j in range(i - 1, i - max_dis - 1, -1):
                    if j < 0:
                        break
                    for k in range(len(train_i['trackid'][j])):

                        if train_i['trackid'][j][k] == train_i['trackid'][i][
                                l]:
                            # train_db_pos_neg = {
                            #     'img_path': train_i['img_files'][i],
                            #     'bboxes': [],
                            #     'labels': [],
                            #     'score_labels': []
                            # }
                            pos_neg_box = train_i['gt'][j][k]
                            c_iou = cal_iou(pos_neg_box, gt_bbox)
                            # del_iou=cal_iou(pos_neg_box,gt_bbox)
                            # print(i-j,del_iou)
                            if c_iou > 0.7:
                                action_label_pos, _ = gen_action_pos_neg_labels(
                                    opts['num_actions'], opts,
                                    np.array(pos_neg_box), gt_bbox)

                                train_db_pos_neg['img_path'].append(
                                    train_i['img_files'][i])
                                train_db_pos_neg['bboxes'].append(pos_neg_box)
                                action_label_pos = np.transpose(
                                    action_label_pos).tolist()
                                train_db_pos_neg['labels'].extend(
                                    action_label_pos)
                                train_db_pos_neg['score_labels'].extend(
                                    list(np.ones(1, dtype=int)))
                                # train_db_pos_neg_gpu.append(train_db_pos_neg)
                            else:
                                bk_sign = True
                                break

                            # train_db_pos_neg = {
                            #     'img_path': train_i['img_files'][i],
                            #     'bboxes': [],
                            #     'labels': [],
                            #     'score_labels': []
                            # }
                            if (i - j) % 3 == 0:
                                nct = -1
                                while True:
                                    # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                                    nct += 1
                                    if nct == 20:
                                        break
                                    neg = gen_samples('gaussian', gt_bbox, 5,
                                                      opts, 2, 10)
                                    r = overlap_ratio(
                                        neg,
                                        np.matlib.repmat(gt_bbox, len(neg), 1))
                                    # neg = neg[np.array(r) < opts['consecutive_negThre_train']]
                                    neg = neg[np.array(
                                        r) < opts['consecutive_negThre_train']]
                                    if len(neg) == 0:
                                        continue
                                        # break
                                    else:
                                        pos_neg_box = neg[0]
                                        # print("neg[0]", end=": ")
                                        # print(neg[0])
                                        break
                                train_db_pos_neg['img_path'].append(
                                    train_i['img_files'][i])
                                train_db_pos_neg['bboxes'].append(pos_neg_box)
                                action_label_neg = np.full(
                                    (opts['num_actions'], 1), fill_value=-1)
                                action_label_neg = np.transpose(
                                    action_label_neg).tolist()
                                train_db_pos_neg['labels'].extend(
                                    action_label_neg)
                                train_db_pos_neg['score_labels'].extend(
                                    list(np.zeros(1, dtype=int)))
                            # train_db_pos_neg_gpu.append(train_db_pos_neg)
                    if bk_sign == True:
                        break

                # if len(train_db_pos_neg['bboxes']) >0:
                # if len(train_db_pos_neg['bboxes']) == 20:
                #     train_db_pos_neg_gpu.append(train_db_pos_neg)
    try:
        lock.acquire()
        # train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        train_db_pos_neg_all.append(train_db_pos_neg)
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_vot(train_sequences, vid_info, opt, train_db_pos,
                     train_db_neg, lock):
    opts = opt.copy()
    train_db_pos_gpu = []
    train_db_neg_gpu = []

    for train_i in range(len(train_sequences)):
        train_db_pos_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        img_idx = train_sequences[train_i]
        gt_bbox = vid_info['gt'][img_idx]

        if len(gt_bbox) == 0:
            continue

        pos_examples = []
        while len(pos_examples) < opts['nPos_train']:
            pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                              opts, 0.1, 5)
            r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
            pos = pos[np.array(r) > opts['posThre_train']]
            if len(pos) == 0:
                continue
            pos = pos[np.random.
                      randint(low=0,
                              high=len(pos),
                              size=min(len(pos), opts['nPos_train'] -
                                       len(pos_examples))), :]
            pos_examples.extend(pos)

        neg_examples = []
        while len(neg_examples) < opts['nNeg_train']:
            # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
            neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                              opts, 2, 10)
            r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
            neg = neg[np.array(r) < opts['negThre_train']]
            if len(neg) == 0:
                continue
            neg = neg[np.random.
                      randint(low=0,
                              high=len(neg),
                              size=min(len(neg), opts['nNeg_train'] -
                                       len(neg_examples))), :]
            neg_examples.extend(neg)

        # examples = pos_examples + neg_examples
        action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                              np.array(pos_examples), gt_bbox)
        action_labels_neg = np.full((opts['num_actions'], len(neg_examples)),
                                    fill_value=-1)

        action_labels_pos = np.transpose(action_labels_pos).tolist()
        action_labels_neg = np.transpose(action_labels_neg).tolist()

        # action_labels = action_labels_pos + action_labels_neg

        train_db_pos_['img_path'] = np.full(len(pos_examples),
                                            vid_info['img_files'][img_idx])
        train_db_pos_['bboxes'] = pos_examples
        train_db_pos_['labels'] = action_labels_pos
        # score labels: 1 is positive. 0 is negative
        train_db_pos_['score_labels'] = list(
            np.ones(len(pos_examples), dtype=int))

        train_db_neg_['img_path'] = np.full(len(neg_examples),
                                            vid_info['img_files'][img_idx])
        train_db_neg_['bboxes'] = neg_examples
        train_db_neg_['labels'] = action_labels_neg
        # score labels: 1 is positive. 0 is negative
        train_db_neg_['score_labels'] = list(
            np.zeros(len(neg_examples), dtype=int))

        train_db_pos_gpu.append(train_db_pos_)
        train_db_neg_gpu.append(train_db_neg_)

    try:
        lock.acquire()
        #print("len(train_db_pos_gpu): %d"%len(train_db_pos_gpu))
        train_db_pos.extend(train_db_pos_gpu)
        #print("len(train_db_pos): %d" % len(train_db_pos))
        #print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu))
        train_db_neg.extend(train_db_neg_gpu)
        #print("len(train_db_neg): %d" % len(train_db_neg))
    except Exception as err:
        raise err
    finally:
        lock.release()
def process_data_ILSVR(img_paths, opt, train_db_pos_neg_all, lock):
    opts = opt.copy()
    train_db_pos_neg_gpu = []
    # train_db_neg_gpu = []
    for train_i in img_paths:
        train_db_pos_ = {
            'img_path': '',
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': '',
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        #img_idx = train_sequences[train_i]
        #gt_bbox = vid_info['gt'][img_idx]

        #if len(gt_bbox) == 0:
        #    continue
        gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i + '.xml'
        #gt_bbox=get_xml_box_label(gt_file_path)
        #opts['imgSize'] = get_xml_img_size(gt_file_path)
        imginfo = get_xml_img_info(gt_file_path)
        gt_bboxs = imginfo['gts']
        opts['imgSize'] = imginfo['imgsize']
        img_path = '../datasets/data/ILSVRC/Data/VID/train/' + train_i + '.JPEG'
        for gt_bbox in gt_bboxs:
            train_db_pos_neg = {
                'img_path': '',
                'bboxes': [],
                'labels': [],
                'score_labels': []
            }
            pos_examples = []
            while len(pos_examples) < opts['nPos_train']:
                pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                                  opts, 0.1, 5)
                r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
                pos = pos[np.array(r) > opts['posThre_train']]
                if len(pos) == 0:
                    #continue
                    break
                pos = pos[np.random.randint(low=0,
                                            high=len(pos),
                                            size=min(
                                                len(pos), opts['nPos_train'] -
                                                len(pos_examples))), :]
                pos_examples.extend(pos)

            neg_examples = []
            while len(neg_examples) < opts['nNeg_train']:
                # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
                neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                                  opts, 2, 10)
                r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
                neg = neg[np.array(r) < opts['negThre_train']]
                if len(neg) == 0:
                    #continue
                    break
                neg = neg[np.random.randint(low=0,
                                            high=len(neg),
                                            size=min(
                                                len(neg), opts['nNeg_train'] -
                                                len(neg_examples))), :]
                neg_examples.extend(neg)

            # examples = pos_examples + neg_examples
            action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                                  np.array(pos_examples),
                                                  gt_bbox)
            action_labels_neg = np.full(
                (opts['num_actions'], len(neg_examples)), fill_value=-1)

            action_labels_pos = np.transpose(action_labels_pos).tolist()
            action_labels_neg = np.transpose(action_labels_neg).tolist()

            # action_labels = action_labels_pos + action_labels_neg

            # train_db_pos_['bboxes'].extend(pos_examples)
            # train_db_pos_['labels'].extend(action_labels_pos)
            # # score labels: 1 is positive. 0 is negative
            # train_db_pos_['score_labels'].extend(list(np.ones(len(pos_examples), dtype=int)))
            #
            #
            # train_db_neg_['bboxes'].extend(neg_examples)
            # train_db_neg_['labels'].extend(action_labels_neg)
            # # score labels: 1 is positive. 0 is negative
            # train_db_neg_['score_labels'].extend(list(np.zeros(len(neg_examples), dtype=int)))

            train_db_pos_neg['bboxes'].extend(pos_examples)
            train_db_pos_neg['labels'].extend(action_labels_pos)
            # score labels: 1 is positive. 0 is negative
            train_db_pos_neg['score_labels'].extend(
                list(np.ones(len(pos_examples), dtype=int)))

            train_db_pos_neg['bboxes'].extend(neg_examples)
            train_db_pos_neg['labels'].extend(action_labels_neg)
            # score labels: 1 is positive. 0 is negative
            train_db_pos_neg['score_labels'].extend(
                list(np.zeros(len(neg_examples), dtype=int)))

            train_db_pos_neg['img_path'] = img_path
            # train_db_pos_['img_path'] = img_path
            # train_db_neg_['img_path'] = img_path

            # if len(train_db_pos_['bboxes']) != 0 and len(train_db_neg_['bboxes']) != 0:
            #     train_db_pos_gpu.append(train_db_pos_)
            #     train_db_neg_gpu.append(train_db_neg_)
            if len(train_db_pos_neg['bboxes']) == (opts['nPos_train'] +
                                                   opts['nNeg_train']):
                train_db_pos_neg_gpu.append(train_db_pos_neg)
                # train_db_neg_gpu.append(train_db_neg_)
            # box_ii += 1

        # img_ii += 1

        # if img_ii==3471:
        #     print("when gt_skip set to 200, and the img_ii=3472, the gen_samples function can't produce examples that iou>thred")
        #     #'ILSVRC2015_VID_train_0002/ILSVRC2015_train_00633000/000025'
        #reason:the img is so small and unclear
        # if img_ii%1000==0 and img_ii!=0:
        #     t9=time.time()
        #     real_time=t9-t2
        #     all_time=t9-t0
        #     all_h=all_time//3600
        #     all_m=all_time%3600//60
        #     all_s=all_time%60
        #     speed_img=1000/real_time
        #     speed_box=(box_ii-box_ii_start)/real_time
        #     all_speed_img=img_ii/all_time
        #     all_speed_box = box_ii/all_time
        #     print('\ndone imgs: %d , done boxes: %d , all imgs: %d. '%(img_ii,box_ii,all_img_num))
        #     print('real_time speed: %d imgs/s, %d boxes/s'%(speed_img,speed_box))
        #     print('avg_time speed: %d imgs/s, %d boxes/s' % (all_speed_img, all_speed_box))
        #     print('spend time: %d h  %d m  %d s (%d s)'%(all_h,all_m,all_s,all_time))
        #     box_ii_start=box_ii
        #     t2=time.time()
    try:
        lock.acquire()
        # print("len(train_db_pos_gpu): %d" % len(train_db_pos_gpu))
        train_db_pos_neg_all.extend(train_db_pos_neg_gpu)
        # print("len(train_db_pos): %d" % len(train_db_pos))
        # print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu))
        # train_db_neg.extend(train_db_neg_gpu)
        # print("len(train_db_neg): %d" % len(train_db_neg))
    except Exception as err:
        raise err
    finally:
        lock.release()
Esempio n. 7
0
def adnet_test(net, vid_path, opts, args):

    if torch.cuda.is_available():
        if args.cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        if not args.cuda:
            print(
                "WARNING: It looks like you have a CUDA device, but aren't " +
                "using CUDA.\nRun with --cuda for optimal training speed.")
            torch.set_default_tensor_type('torch.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    transform = ADNet_Augmentation(opts)

    print('Testing sequences in ' + str(vid_path) + '...')
    t_sum = 0

    if args.visualize:
        writer = SummaryWriter(
            log_dir=os.path.join('tensorboardx_log', 'online_adapatation_' +
                                 args.save_result_npy))

    ################################
    # Load video sequences
    ################################

    vid_info = {'gt': [], 'img_files': [], 'nframes': 0}

    vid_info['img_files'] = glob.glob(os.path.join(vid_path, 'color', '*.jpg'))
    vid_info['img_files'].sort(key=str.lower)

    gt_path = os.path.join(vid_path, 'groundtruth.txt')

    if not os.path.exists(gt_path):
        bboxes = []
        t = 0
        return bboxes, t_sum

    # parse gt
    gtFile = open(gt_path, 'r')
    gt = gtFile.read().split('\n')
    for i in range(len(gt)):
        if gt[i] == '' or gt[i] is None:
            continue

        if ',' in gt[i]:
            separator = ','
        elif '\t' in gt[i]:
            separator = '\t'
        elif ' ' in gt[i]:
            separator = ' '
        else:
            separator = ','

        gt[i] = gt[i].split(separator)
        gt[i] = list(map(float, gt[i]))
    gtFile.close()

    if len(gt[0]) >= 6:
        for gtidx in range(len(gt)):
            if gt[gtidx] == "":
                continue
            x = gt[gtidx][0:len(gt[gtidx]):2]
            y = gt[gtidx][1:len(gt[gtidx]):2]
            gt[gtidx] = [min(x), min(y), max(x) - min(x), max(y) - min(y)]

    vid_info['gt'] = gt
    if vid_info['gt'][-1] == '':  # small hack
        vid_info['gt'] = vid_info['gt'][:-1]
    vid_info['nframes'] = min(len(vid_info['img_files']), len(vid_info['gt']))

    # catch the first box
    curr_bbox = vid_info['gt'][0]

    # init containers
    bboxes = np.zeros(np.array(
        vid_info['gt']).shape)  # tracking result containers

    ntraining = 0

    # setup training
    if args.cuda:
        optimizer = optim.SGD([{
            'params': net.module.base_network.parameters(),
            'lr': 0
        }, {
            'params': net.module.fc4_5.parameters()
        }, {
            'params': net.module.fc6.parameters()
        }, {
            'params': net.module.fc7.parameters(),
            'lr': 1e-3
        }],
                              lr=1e-3,
                              momentum=opts['train']['momentum'],
                              weight_decay=opts['train']['weightDecay'])
    else:
        optimizer = optim.SGD([{
            'params': net.base_network.parameters(),
            'lr': 0
        }, {
            'params': net.fc4_5.parameters()
        }, {
            'params': net.fc6.parameters()
        }, {
            'params': net.fc7.parameters(),
            'lr': 1e-3
        }],
                              lr=1e-3,
                              momentum=opts['train']['momentum'],
                              weight_decay=opts['train']['weightDecay'])

    action_criterion = nn.CrossEntropyLoss()
    score_criterion = nn.CrossEntropyLoss()

    dataset_storage_pos = None
    dataset_storage_neg = None
    is_negative = False  # is_negative = True if the tracking failed
    target_score = 0
    all_iteration = 0
    t = 0

    for idx in range(vid_info['nframes']):
        # for frame_idx, frame_path in enumerate(vid_info['img_files']):
        frame_idx = idx
        frame_path = vid_info['img_files'][idx]
        t0_wholetracking = time.time()
        frame = cv2.imread(frame_path)

        # draw box or with display, then save
        if args.display_images:
            im_with_bb = display_result(frame,
                                        curr_bbox)  # draw box and display
        else:
            im_with_bb = draw_box(frame, curr_bbox)

        if args.save_result_images:
            filename = os.path.join(args.save_result_images,
                                    str(frame_idx) + '-' + str(t) + '.jpg')
            cv2.imwrite(filename, im_with_bb)

        curr_bbox_old = curr_bbox
        cont_negatives = 0

        if frame_idx > 0:
            # tracking
            if args.cuda:
                net.module.set_phase('test')
            else:
                net.set_phase('test')
            t = 0
            while True:
                curr_patch, curr_bbox, _, _ = transform(
                    frame, curr_bbox, None, None)
                if args.cuda:
                    curr_patch = curr_patch.cuda()

                curr_patch = curr_patch.unsqueeze(
                    0)  # 1 batch input [1, curr_patch.shape]

                fc6_out, fc7_out = net.forward(curr_patch)

                curr_score = fc7_out.detach().cpu().numpy()[0][1]

                if ntraining > args.believe_score_result:
                    if curr_score < opts['failedThre']:
                        cont_negatives += 1

                if args.cuda:
                    action = np.argmax(fc6_out.detach().cpu().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().cpu().numpy()[0][action]
                else:
                    action = np.argmax(fc6_out.detach().numpy()
                                       )  # TODO: really okay to detach?
                    action_prob = fc6_out.detach().numpy()[0][action]

                # do action
                curr_bbox = do_action(curr_bbox, opts, action, frame.shape)

                # bound the curr_bbox size
                if curr_bbox[2] < 10:
                    curr_bbox[0] = min(
                        0, curr_bbox[0] + curr_bbox[2] / 2 - 10 / 2)
                    curr_bbox[2] = 10
                if curr_bbox[3] < 10:
                    curr_bbox[1] = min(
                        0, curr_bbox[1] + curr_bbox[3] / 2 - 10 / 2)
                    curr_bbox[3] = 10

                t += 1

                # draw box or with display, then save
                if args.display_images:
                    im_with_bb = display_result(
                        frame, curr_bbox)  # draw box and display
                else:
                    im_with_bb = draw_box(frame, curr_bbox)

                if args.save_result_images:
                    filename = os.path.join(
                        args.save_result_images,
                        str(frame_idx) + '-' + str(t) + '.jpg')
                    cv2.imwrite(filename, im_with_bb)

                if action == opts[
                        'stop_action'] or t >= opts['num_action_step_max']:
                    break

            print('final curr_score: %.4f' % curr_score)

            # redetection when confidence < threshold 0.5. But when fc7 is already reliable. Else, just trust the ADNet
            if ntraining > args.believe_score_result:
                if curr_score < 0.5:
                    print('redetection')
                    is_negative = True

                    # redetection process
                    redet_samples = gen_samples(
                        'gaussian', curr_bbox_old, opts['redet_samples'], opts,
                        min(1.5, 0.6 * 1.15**cont_negatives),
                        opts['redet_scale_factor'])
                    score_samples = []

                    for redet_sample in redet_samples:
                        temp_patch, temp_bbox, _, _ = transform(
                            frame, redet_sample, None, None)
                        if args.cuda:
                            temp_patch = temp_patch.cuda()

                        temp_patch = temp_patch.unsqueeze(
                            0)  # 1 batch input [1, curr_patch.shape]

                        fc6_out_temp, fc7_out_temp = net.forward(temp_patch)

                        score_samples.append(
                            fc7_out_temp.detach().cpu().numpy()[0][1])

                    score_samples = np.array(score_samples)
                    max_score_samples_idx = np.argmax(score_samples)

                    # replace the curr_box with the samples with maximum score
                    curr_bbox = redet_samples[max_score_samples_idx]

                    # update the final result image
                    if args.display_images:
                        im_with_bb = display_result(
                            frame, curr_bbox)  # draw box and display
                    else:
                        im_with_bb = draw_box(frame, curr_bbox)

                    if args.save_result_images:
                        filename = os.path.join(args.save_result_images,
                                                str(frame_idx) + '-redet.jpg')
                        cv2.imwrite(filename, im_with_bb)
                else:
                    is_negative = False
            else:
                is_negative = False

        if args.save_result_images:
            filename = os.path.join(args.save_result_images,
                                    'final-' + str(frame_idx) + '.jpg')
            cv2.imwrite(filename, im_with_bb)

        # record the curr_bbox result
        bboxes[frame_idx] = curr_bbox

        # create or update storage + set iteration_range for training
        if frame_idx == 0:
            dataset_storage_pos = OnlineAdaptationDatasetStorage(
                initial_frame=frame,
                first_box=curr_bbox,
                opts=opts,
                args=args,
                positive=True)
            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                dataset_storage_neg = OnlineAdaptationDatasetStorage(
                    initial_frame=frame,
                    first_box=curr_bbox,
                    opts=opts,
                    args=args,
                    positive=False)

            iteration_range = range(opts['finetune_iters'])
        else:
            assert dataset_storage_pos is not None
            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                assert dataset_storage_neg is not None

            # if confident or when always generate samples, generate new samples
            if ntraining < args.believe_score_result:
                always_generate_samples = True  # as FC7 wasn't trained, it is better to wait for some time to believe its confidence result to decide whether to generate samples or not.. Before believe it, better to just generate sample always
            else:
                always_generate_samples = False

            if always_generate_samples or (not is_negative or
                                           target_score > opts['successThre']):
                dataset_storage_pos.add_frame_then_generate_samples(
                    frame, curr_bbox)

            iteration_range = range(opts['finetune_iters_online'])

        # training when depend on the frequency.. else, don't run the training code...
        if False and frame_idx % args.online_adaptation_every_I_frames == 0:
            ntraining += 1
            # generate dataset just before training
            dataset_pos = OnlineAdaptationDataset(dataset_storage_pos)
            data_loader_pos = data.DataLoader(dataset_pos,
                                              opts['minibatch_size'],
                                              num_workers=args.num_workers,
                                              shuffle=True,
                                              pin_memory=False)
            batch_iterator_pos = None

            if opts['nNeg_init'] != 0:  # (thanks to small hack in adnet_test) the nNeg_online is also 0
                dataset_neg = OnlineAdaptationDataset(dataset_storage_neg)
                data_loader_neg = data.DataLoader(dataset_neg,
                                                  opts['minibatch_size'],
                                                  num_workers=args.num_workers,
                                                  shuffle=True,
                                                  pin_memory=False)
                batch_iterator_neg = None
            else:
                dataset_neg = []

            epoch_size_pos = len(dataset_pos) // opts['minibatch_size']
            epoch_size_neg = len(dataset_neg) // opts['minibatch_size']
            epoch_size = epoch_size_pos + epoch_size_neg  # 1 epoch, how many iterations

            which_dataset = list(np.full(epoch_size_pos, fill_value=1))
            which_dataset.extend(np.zeros(epoch_size_neg, dtype=int))
            shuffle(which_dataset)

            print("1 epoch = " + str(epoch_size) + " iterations")

            if args.cuda:
                net.module.set_phase('train')
            else:
                net.set_phase('train')

            # training loop
            for iteration in iteration_range:
                all_iteration += 1  # use this for update the visualization
                # create batch iterator
                if (not batch_iterator_pos) or (iteration % epoch_size == 0):
                    batch_iterator_pos = iter(data_loader_pos)

                if opts['nNeg_init'] != 0:
                    if (not batch_iterator_neg) or (iteration % epoch_size
                                                    == 0):
                        batch_iterator_neg = iter(data_loader_neg)

                # load train data
                if which_dataset[iteration %
                                 len(which_dataset)]:  # if positive
                    images, bbox, action_label, score_label = next(
                        batch_iterator_pos)
                else:
                    images, bbox, action_label, score_label = next(
                        batch_iterator_neg)

                if args.cuda:
                    images = torch.Tensor(images.cuda())
                    bbox = torch.Tensor(bbox.cuda())
                    action_label = torch.Tensor(action_label.cuda())
                    score_label = torch.Tensor(score_label.float().cuda())

                else:
                    images = torch.Tensor(images)
                    bbox = torch.Tensor(bbox)
                    action_label = torch.Tensor(action_label)
                    score_label = torch.Tensor(score_label)

                # forward
                t0 = time.time()
                action_out, score_out = net(images)

                # backprop
                optimizer.zero_grad()
                if which_dataset[iteration %
                                 len(which_dataset)]:  # if positive
                    action_l = action_criterion(action_out,
                                                torch.max(action_label, 1)[1])
                else:
                    action_l = torch.Tensor([0])
                score_l = score_criterion(score_out, score_label.long())
                loss = action_l + score_l
                loss.backward()
                optimizer.step()
                t1 = time.time()

                if all_iteration % 10 == 0:
                    print('Timer: %.4f sec.' % (t1 - t0))
                    print('iter ' + repr(all_iteration) + ' || Loss: %.4f ||' %
                          (loss.data.item()),
                          end=' ')
                    if args.visualize and args.send_images_to_visualization:
                        random_batch_index = np.random.randint(images.size(0))
                        writer.add_image(
                            'image',
                            images.data[random_batch_index].cpu().numpy(),
                            random_batch_index)

                if args.visualize:
                    writer.add_scalars(
                        'data/iter_loss', {
                            'action_loss': action_l.item(),
                            'score_loss': score_l.item(),
                            'total': (action_l.item() + score_l.item())
                        },
                        global_step=all_iteration)

        t1_wholetracking = time.time()
        t_sum += t1_wholetracking - t0_wholetracking
        print('whole tracking time = %.4f sec.' %
              (t1_wholetracking - t0_wholetracking))

    # evaluate the precision
    bboxes = np.array(bboxes)
    vid_info['gt'] = np.array(vid_info['gt'])

    # iou_precisions = iou_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images)
    #
    # distance_precisions = distance_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images)
    #
    # precisions = [distance_precisions, iou_precisions]

    np.save(args.save_result_npy + '-bboxes.npy', bboxes)
    np.save(args.save_result_npy + '-ground_truth.npy', vid_info['gt'])

    # return bboxes, t_sum, precisions
    return bboxes, t_sum
Esempio n. 8
0
def get_train_dbs(vid_info, opts):
    img = cv2.imread(vid_info['img_files'][0])

    opts['scale_factor'] = 1.05
    opts['imgSize'] = list(img.shape)
    gt_skip = opts['train']['gt_skip']

    if vid_info['db_name'] == 'alov300':
        train_sequences = vid_info['gt_use'] == 1
    else:
        train_sequences = list(range(0, vid_info['nframes'], gt_skip))

    train_db_pos = []
    train_db_neg = []

    for train_i in range(len(train_sequences)):
        train_db_pos_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }
        train_db_neg_ = {
            'img_path': [],
            'bboxes': [],
            'labels': [],
            'score_labels': []
        }

        img_idx = train_sequences[train_i]
        gt_bbox = vid_info['gt'][img_idx]

        if len(gt_bbox) == 0:
            continue

        pos_examples = []
        while len(pos_examples) < opts['nPos_train']:
            pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5,
                              opts, 0.1, 5)
            r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1))
            pos = pos[np.array(r) > opts['posThre_train']]
            if len(pos) == 0:
                continue
            pos = pos[np.random.
                      randint(low=0,
                              high=len(pos),
                              size=min(len(pos), opts['nPos_train'] -
                                       len(pos_examples))), :]
            pos_examples.extend(pos)

        neg_examples = []
        while len(neg_examples) < opts['nNeg_train']:
            # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train']
            neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5,
                              opts, 2, 10)
            r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1))
            neg = neg[np.array(r) < opts['negThre_train']]
            if len(neg) == 0:
                continue
            neg = neg[np.random.
                      randint(low=0,
                              high=len(neg),
                              size=min(len(neg), opts['nNeg_train'] -
                                       len(neg_examples))), :]
            neg_examples.extend(neg)

        show_examples_test(pos_examples, neg_examples,
                           vid_info['img_files'][img_idx])
        # examples = pos_examples + neg_examples
        action_labels_pos = gen_action_labels(opts['num_actions'], opts,
                                              np.array(pos_examples), gt_bbox)
        action_labels_neg = np.full((opts['num_actions'], len(neg_examples)),
                                    fill_value=-1)

        action_labels_pos = np.transpose(action_labels_pos).tolist()
        action_labels_neg = np.transpose(action_labels_neg).tolist()

        # action_labels = action_labels_pos + action_labels_neg

        train_db_pos_['img_path'] = np.full(len(pos_examples),
                                            vid_info['img_files'][img_idx])
        train_db_pos_['bboxes'] = pos_examples
        train_db_pos_['labels'] = action_labels_pos
        # score labels: 1 is positive. 0 is negative
        train_db_pos_['score_labels'] = list(
            np.ones(len(pos_examples), dtype=int))

        train_db_neg_['img_path'] = np.full(len(neg_examples),
                                            vid_info['img_files'][img_idx])
        train_db_neg_['bboxes'] = neg_examples
        train_db_neg_['labels'] = action_labels_neg
        # score labels: 1 is positive. 0 is negative
        train_db_neg_['score_labels'] = list(
            np.zeros(len(neg_examples), dtype=int))

        train_db_pos.append(train_db_pos_)
        train_db_neg.append(train_db_neg_)

    return train_db_pos, train_db_neg