Exemple #1
0
    def __init__(self):
        self.args = self.get_parser().parse_args()
        self._try_downloading_necessities(self.args.model_name)
        self.detection_model = self._build_detection_model()
        self.transforms = build_transforms(cfg, is_train=False)

        os.makedirs(self.args.output_folder, exist_ok=True)
    def __init__(self, cfg, root):

        self.dataPath = root
        self.transforms = build_transforms(cfg, True)
        self.dataloader = myDataset(self.dataPath,
                                    transforms=self.transforms)
        self.root = root
        self.cfg = cfg
 def __init__(self, cfg, root):
     self.dataPath = os.path.join("data_maskrcnn", root)
     self.frames = os.path.join(self.dataPath, 'Frames')
     self.bboxes = os.path.join(self.dataPath, 'Bboxes')
     self.optFlow = os.path.join(self.dataPath, 'OptFlow')
     self.transforms = build_transforms(cfg, True)
     self.dataloader = myDataset(self.dataPath, transforms=self.transforms)
     self.cfg = cfg
 def __init__(self, cfg, mode, opt_flow_on=False):
     self.dataPath = "Movie_Frames_{}".format(mode)
     self.imgPath = os.path.join(self.dataPath, 'Frames')
     self.csvPath = os.path.join(self.dataPath, 'Bboxes_mydataset')
     self.npyPath = os.path.join(self.dataPath, 'opt_flow_np')
     self.transforms = build_transforms(cfg, True)
     self.dataloader = myDataset(self.csvPath,
                                 self.imgPath,
                                 transforms=self.transforms)
     self.mode = mode
     self.cfg = cfg
Exemple #5
0
def split(cfg, num_fold):
    # when calculate the overlap ratio, set the sliding window overlap = 0
    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TEST

    transforms = build_transforms(cfg, False)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, False)
    collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
    data_loader = torch.utils.data.DataLoader(
        datasets[0],
        num_workers=1,
        collate_fn=collator,
    )

    name_dic = defaultdict(list)
    for i, data in enumerate(data_loader, 0):
        boxlists = data[1][0]

        masks = annToMask(boxlists)
        isCrowed = len(masks) * [0]
        iou = maskUtils.iou(masks, masks, isCrowed) - np.eye(len(masks))
        maxiou = np.max(iou, 0)
        avgiou = np.mean(maxiou)
        name = datasets[0].get_img_info(i)['file_name']
        name_dic[name].append(avgiou)
    names = []
    overlapping = []
    for k, v in name_dic.items():
        names.append(k)
        overlapping.append(sum(v) / len(v))
    median = get_median(overlapping)
    # pdb.set_trace()
    hard = []
    easy = []
    for name, miou in zip(names, overlapping):
        if miou > median:
            hard.append(name)
        else:
            easy.append(name)

    # pdb.set_trace()
    easy_n = chunks(easy, num_fold)
    hard_n = chunks(hard, num_fold)

    i = 1
    splitfile = {}
    for e, h in zip(easy_n, hard_n):
        splitfile[i] = {'easy': e, 'hard': h}
        i += 1

    with open('split.json', 'w') as f:
        json.dump(splitfile, f)
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )

    weights = cfg.MODEL.WEIGHT

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    transforms = build_transforms(cfg, True)
    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        cfg,
        distributed,
    )

    return model
Exemple #7
0
def im_detect(model, img_original, image_id, Test_RCNN, fastText, prior_mask,
              Action_dic_inv, object_thres, human_thres, prior_flag, detection,
              detect_object_centric_dict, device, cfg):
    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))

    im_shape = (img_original.height, img_original.width)  # (480, 640)
    transforms = build_transforms(cfg, is_train=False)
    worddim = fastText[1].shape[1]

    for object_out in Test_RCNN[image_id]:
        if (
                np.max(object_out[5]) > object_thres
        ):  # and (object_out[1] == 'Object'): # This is a valid object # it is possible to have human-human interaction

            h_box = np.empty((0, 4), dtype=np.float32)
            object_word_embedding = np.empty((0, worddim), dtype=np.float32)
            human_score = np.empty((0, 1), dtype=np.float32)
            object_class = np.empty((0, 1), dtype=np.int32)
            Weight_mask = np.empty((0, 29), dtype=np.float32)

            for human in Test_RCNN[image_id]:
                if (human[1] == 'Human'
                    ) and (np.max(human[5]) > human_thres) and not (np.all(
                        human[2] == object_out[2])):  # This is a valid human
                    h_box_ = np.array(
                        [human[2][0], human[2][1], human[2][2],
                         human[2][3]]).reshape(1, 4)
                    h_box = np.concatenate((h_box, h_box_), axis=0)

                    object_word_embedding_ = fastText[object_out[4]]
                    object_word_embedding = np.concatenate(
                        (object_word_embedding, object_word_embedding_),
                        axis=0)

                    # Pattern_ = generate_spatial(human[2], object_out[2]).reshape(1, 2, 64, 64)
                    # Pattern  = np.concatenate((Pattern, Pattern_), axis=0)

                    human_score = np.concatenate(
                        (human_score, np.max(human[5]).reshape(1, 1)), axis=0)
                    object_class = np.concatenate(
                        (object_class, np.array(object_out[4]).reshape(1, 1)),
                        axis=0)

                    Weight_mask_ = prior_mask[:, object_out[4]].reshape(1, 29)
                    Weight_mask = np.concatenate((Weight_mask, Weight_mask_),
                                                 axis=0)

            o_box = np.array([
                object_out[2][0], object_out[2][1], object_out[2][2],
                object_out[2][3]
            ]).reshape(1, 4)

            if len(h_box) == 0:
                continue

            blobs = {}
            pos_num = len(h_box)
            blobs['pos_num'] = pos_num
            human_boxes_cpu = h_box.reshape(pos_num, 4)
            human_boxes = torch.FloatTensor(human_boxes_cpu)
            object_boxes_cpu = np.tile(o_box,
                                       [len(h_box), 1]).reshape(pos_num, 4)
            object_boxes = torch.FloatTensor(object_boxes_cpu)

            blobs['object_word_embeddings_object_centric'] = torch.FloatTensor(
                object_word_embedding).reshape(pos_num, worddim)

            human_boxlist = BoxList(human_boxes,
                                    img_original.size,
                                    mode="xyxy")  # image_size=(width, height)
            object_boxlist = BoxList(object_boxes,
                                     img_original.size,
                                     mode="xyxy")  # image_size=(width, height)

            img, human_boxlist, object_boxlist = transforms(
                img_original, human_boxlist, object_boxlist)

            spatials = []
            for human_box, object_box in zip(human_boxlist.bbox,
                                             object_boxlist.bbox):
                ho_spatial = generate_spatial(human_box.numpy(),
                                              object_box.numpy()).reshape(
                                                  1, 2, 64, 64)
                spatials.append(ho_spatial)
            blobs['spatials_object_centric'] = torch.FloatTensor(
                spatials).reshape(-1, 2, 64, 64)
            blobs['human_boxes'], blobs['object_boxes'] = (human_boxlist, ), (
                object_boxlist, )

            for key in blobs.keys():
                if not isinstance(blobs[key], int) and not isinstance(
                        blobs[key], tuple):
                    blobs[key] = blobs[key].to(device)
                elif isinstance(blobs[key], tuple):
                    blobs[key] = [boxlist.to(device) for boxlist in blobs[key]]

            image_list = to_image_list(img, cfg.DATALOADER.SIZE_DIVISIBILITY)
            image_list = image_list.to(device)

            # compute predictions
            model.eval()
            with torch.no_grad():
                prediction_HO, prediction_H, prediction_O, prediction_sp = model(
                    image_list, blobs)

            #convert to np.array
            prediction_HO = prediction_HO.data.cpu().numpy()
            prediction_H = prediction_H.data.cpu().numpy()
            # prediction_O = prediction_O.data.cpu().numpy()
            prediction_sp = prediction_sp.data.cpu().numpy()

            # test sp branch only
            prediction_HO = prediction_sp

            if prior_flag == 1:
                prediction_HO = apply_prior_Graph(object_class, prediction_HO)
            if prior_flag == 2:
                prediction_HO = prediction_HO * Weight_mask
            if prior_flag == 3:
                prediction_HO = apply_prior_Graph(object_class, prediction_HO)
                prediction_HO = prediction_HO * Weight_mask

            # save image information
            for idx in range(pos_num):
                human_out = human_boxes_cpu[idx, :]
                dic = {}
                dic['image_id'] = image_id
                dic['person_box'] = human_out
                dic['person_score'] = human_score[idx][0]
                dic['prediction_H'] = prediction_H[idx]
                dic['prediction_sp'] = prediction_sp[idx]  # before prior
                dic['object_box'] = object_out[2]
                dic['O_score'] = np.max(object_out[5])
                dic['O_class'] = object_out[4]
                Score_obj = prediction_HO[idx] * np.max(object_out[5])
                Score_obj = np.concatenate((object_out[2], Score_obj), axis=0)
                dic['Score_obj'] = Score_obj

                detect_object_centric_dict[image_id].append(dic)

    try:
        del blobs
    except:
        pass

    try:
        del image_list
    except:
        pass

    torch.cuda.empty_cache()
Exemple #8
0
def im_detect(model, image_id, Test_RCNN, word_embeddings, object_thres,
              human_thres, detection, detection_human, detection_object,
              device, opt):
    # im_orig, im_shape = get_blob(image_id, cfg)

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    im_file = os.path.join(
        DATA_DIR, 'hico_20160224_det', 'images', 'test2015',
        'HICO_test2015_' + (str(image_id)).zfill(8) + '.jpg')
    img_original = Image.open(im_file)
    img_original = img_original.convert('RGB')
    # when using Image.open to read images, img.size= (640, 480), while using cv2.imread, im.shape = (480, 640)
    # to be consistent with previous code, I used img.height, img.width here
    im_shape = (img_original.height, img_original.width)  # (480, 640)
    transforms = build_transforms(cfg, is_train=False)

    This_human = []
    this_pair_h = []
    this_pair_o = []

    for Human in Test_RCNN[image_id]:

        if (np.max(Human[5]) > human_thres) and (
                Human[1] == 'Human'):  # This is a valid human

            O_box = np.empty((0, 4), dtype=np.float32)
            O_vec = np.empty((0, 300), dtype=np.float32)
            Pattern = np.empty((0, 2, 64, 64), dtype=np.float32)
            O_score = np.empty((0, 1), dtype=np.float32)
            O_class = np.empty((0, 1), dtype=np.int32)

            for Object in Test_RCNN[image_id]:
                if opt['use_thres_dic'] == 1:
                    object_thres_ = opt['thres_dic'][Object[4]]
                else:
                    object_thres_ = object_thres

                if (np.max(Object[5]) > object_thres_) and not (np.all(
                        Object[2] == Human[2])):  # This is a valid object

                    O_box_ = np.array([
                        Object[2][0], Object[2][1], Object[2][2], Object[2][3]
                    ]).reshape(1, 4)
                    O_box = np.concatenate((O_box, O_box_), axis=0)

                    O_vec_ = word_embeddings[Object[4]]
                    O_vec = np.concatenate((O_vec, O_vec_), axis=0)

                    Pattern_ = generate_spatial(Human[2], Object[2]).reshape(
                        1, 2, 64, 64)
                    Pattern = np.concatenate((Pattern, Pattern_), axis=0)

                    O_score = np.concatenate(
                        (O_score, np.max(Object[5]).reshape(1, 1)), axis=0)
                    O_class = np.concatenate(
                        (O_class, np.array(Object[4]).reshape(1, 1)), axis=0)

            if len(O_box) == 0:
                continue
            H_box = np.array(
                [Human[2][0], Human[2][1], Human[2][2],
                 Human[2][3]]).reshape(1, 4)

            blobs = {}
            blobs['pos_num'] = len(O_box)
            pos_num = len(O_box)
            human_boxes_cpu = np.tile(H_box,
                                      [len(O_box), 1]).reshape(pos_num, 4)
            human_boxes = torch.FloatTensor(human_boxes_cpu)
            object_boxes_cpu = O_box.reshape(pos_num, 4)
            object_boxes = torch.FloatTensor(object_boxes_cpu)

            human_boxlist = BoxList(human_boxes,
                                    img_original.size,
                                    mode="xyxy")  # image_size=(width, height)
            object_boxlist = BoxList(object_boxes,
                                     img_original.size,
                                     mode="xyxy")  # image_size=(width, height)

            img, human_boxlist, object_boxlist = transforms(
                img_original, human_boxlist, object_boxlist)

            spatials = []
            for human_box, object_box in zip(human_boxlist.bbox,
                                             object_boxlist.bbox):
                ho_spatial = generate_spatial(human_box.numpy(),
                                              object_box.numpy()).reshape(
                                                  1, 2, 64, 64)
                spatials.append(ho_spatial)
            blobs['spatials'] = torch.FloatTensor(spatials).reshape(
                -1, 2, 64, 64)
            blobs['human_boxes'], blobs['object_boxes'] = (human_boxlist, ), (
                object_boxlist, )
            blobs['object_word_embeddings'] = torch.FloatTensor(O_vec).reshape(
                pos_num, 300)

            for key in blobs.keys():
                if not isinstance(blobs[key], int) and not isinstance(
                        blobs[key], tuple):
                    blobs[key] = blobs[key].to(device)
                elif isinstance(blobs[key], tuple):
                    blobs[key] = [boxlist.to(device) for boxlist in blobs[key]]

            image_list = to_image_list(img, cfg.DATALOADER.SIZE_DIVISIBILITY)
            image_list = image_list.to(device)

            # compute predictions
            model.eval()
            with torch.no_grad():
                prediction_HO, prediction_H, prediction_O, prediction_sp = model(
                    image_list, blobs)

            # convert to np.array
            # prediction_HO = prediction_H + prediction_O
            prediction_HO = prediction_H * prediction_O
            prediction_HO = prediction_HO.data.cpu().numpy()
            prediction_H = prediction_H.data.cpu().numpy()
            prediction_O = prediction_O.data.cpu().numpy()
            # prediction_sp = prediction_sp.data.cpu().numpy()

            for idx in range(len(prediction_HO)):
                temp = []
                temp.append(Human[2])  # Human box
                temp.append(O_box[idx])  # Object box
                temp.append(O_class[idx])  # Object class
                temp.append(prediction_HO[idx])  # Score
                temp.append(Human[5])  # Human score
                temp.append(O_score[idx])  # Object score
                This_human.append(temp)

            for idx in range(len(prediction_H)):
                temp = []
                temp.append(Human[2])  # Human box
                temp.append(O_box[idx])  # Object box
                temp.append(O_class[idx])  # Object class
                temp.append(prediction_H[idx])  # Score
                temp.append(Human[5])  # Human score
                temp.append(O_score[idx])  # Object score
                this_pair_h.append(temp)

            for idx in range(len(prediction_O)):
                temp = []
                temp.append(Human[2])  # Human box
                temp.append(O_box[idx])  # Object box
                temp.append(O_class[idx])  # Object class
                temp.append(prediction_O[idx])  # Score
                temp.append(Human[5])  # Human score
                temp.append(O_score[idx])  # Object score
                this_pair_o.append(temp)

    detection[image_id] = This_human
    detection_human[image_id] = this_pair_h
    detection_object[image_id] = this_pair_o
def im_detect(model, img_original, image_id, Test_RCNN, fastText, prior_mask,
              Action_dic_inv, object_thres, human_thres, prior_flag, detection,
              detect_app_dict, device, cfg):

    # im_orig, im_shape = get_blob(image_id, cfg)

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    # if "train" in im_dir:
    #     im_file = os.path.join(DATA_DIR, im_dir, 'COCO_train2014_' + (str(image_id)).zfill(12) + '.jpg')
    # else:
    #     im_file = os.path.join(DATA_DIR, im_dir, 'COCO_val2014_' + (str(image_id)).zfill(12) + '.jpg')

    # when using Image.open to read images, img.size= (640, 480), while using cv2.imread, im.shape = (480, 640)
    # to be consistent with previous code, I used img.height, img.width here
    im_shape = (img_original.height, img_original.width)  # (480, 640)
    transforms = build_transforms(cfg, is_train=False)

    for Human_out in Test_RCNN[image_id]:
        if (np.max(Human_out[5]) > human_thres) and (
                Human_out[1] == 'Human'):  # This is a valid human

            O_box = np.empty((0, 4), dtype=np.float32)
            O_vec = np.empty((0, 300), dtype=np.float32)
            Pattern = np.empty((0, 2, 64, 64), dtype=np.float32)
            O_score = np.empty((0, 1), dtype=np.float32)
            O_class = np.empty((0, 1), dtype=np.int32)
            Weight_mask = np.empty((0, 29), dtype=np.float32)

            for Object in Test_RCNN[image_id]:
                if (np.max(Object[5]) > object_thres) and not (np.all(
                        Object[2] == Human_out[2])):  # This is a valid object
                    O_box_ = np.array([
                        Object[2][0], Object[2][1], Object[2][2], Object[2][3]
                    ]).reshape(1, 4)
                    O_box = np.concatenate((O_box, O_box_), axis=0)

                    O_vec_ = fastText[Object[4]]
                    O_vec = np.concatenate((O_vec, O_vec_), axis=0)

                    # Pattern_ = Get_next_sp(Human_out[2], Object[2]).reshape(1, 64, 64, 2)
                    # Pattern  = np.concatenate((Pattern, Pattern_), axis=0)
                    Pattern_ = generate_spatial(Human_out[2],
                                                Object[2]).reshape(
                                                    1, 2, 64, 64)
                    Pattern = np.concatenate((Pattern, Pattern_), axis=0)

                    O_score = np.concatenate(
                        (O_score, np.max(Object[5]).reshape(1, 1)), axis=0)
                    O_class = np.concatenate(
                        (O_class, np.array(Object[4]).reshape(1, 1)), axis=0)

                    Weight_mask_ = prior_mask[:, Object[4]].reshape(1, 29)
                    Weight_mask = np.concatenate((Weight_mask, Weight_mask_),
                                                 axis=0)

            H_box = np.array([
                Human_out[2][0], Human_out[2][1], Human_out[2][2],
                Human_out[2][3]
            ]).reshape(1, 4)

            if len(O_box) == 0:
                continue

            blobs = {}
            pos_num = len(O_box)
            blobs['pos_num'] = pos_num
            # blobs['dropout_is_training'] = False
            human_boxes_cpu = np.tile(H_box,
                                      [len(O_box), 1]).reshape(pos_num, 4)
            human_boxes = torch.FloatTensor(human_boxes_cpu)
            object_boxes_cpu = O_box.reshape(pos_num, 4)
            object_boxes = torch.FloatTensor(object_boxes_cpu)

            blobs['object_word_embeddings'] = torch.FloatTensor(O_vec).reshape(
                pos_num, 300)

            human_boxlist = BoxList(human_boxes,
                                    img_original.size,
                                    mode="xyxy")  # image_size=(width, height)
            object_boxlist = BoxList(object_boxes,
                                     img_original.size,
                                     mode="xyxy")  # image_size=(width, height)

            img, human_boxlist, object_boxlist = transforms(
                img_original, human_boxlist, object_boxlist)

            spatials = []
            for human_box, object_box in zip(human_boxlist.bbox,
                                             object_boxlist.bbox):
                ho_spatial = generate_spatial(human_box.numpy(),
                                              object_box.numpy()).reshape(
                                                  1, 2, 64, 64)
                spatials.append(ho_spatial)
            blobs['spatials'] = torch.FloatTensor(spatials).reshape(
                -1, 2, 64, 64)
            blobs['human_boxes'], blobs['object_boxes'] = (human_boxlist, ), (
                object_boxlist, )

            for key in blobs.keys():
                if not isinstance(blobs[key], int) and not isinstance(
                        blobs[key], tuple):
                    blobs[key] = blobs[key].to(device)
                elif isinstance(blobs[key], tuple):
                    blobs[key] = [boxlist.to(device) for boxlist in blobs[key]]

            image_list = to_image_list(img, cfg.DATALOADER.SIZE_DIVISIBILITY)
            image_list = image_list.to(device)

            # compute predictions
            model.eval()
            with torch.no_grad():
                prediction_HO, prediction_H, prediction_O, prediction_sp = model(
                    image_list, blobs)

            #convert to np.array
            # test h + o branch Only
            prediction_HO = prediction_H * prediction_O

            prediction_HO = prediction_HO.data.cpu().numpy()
            prediction_H = prediction_H.data.cpu().numpy()
            prediction_O = prediction_O.data.cpu().numpy()
            # prediction_sp = prediction_sp.data.cpu().numpy()

            dic_save = {}
            dic_save['image_id'] = image_id
            dic_save['person_box'] = Human_out[2]
            dic_save['person_score'] = np.max(Human_out[5])
            dic_save['prediction_HO'] = prediction_HO
            dic_save['prediction_H'] = prediction_H
            dic_save['prediction_O'] = prediction_O
            dic_save['o_class'] = O_class
            dic_save['object_boxes_cpu'] = object_boxes_cpu
            dic_save['O_score'] = O_score

            detect_app_dict[image_id].append(dic_save)

    try:
        del blobs
    except:
        pass

    try:
        del image_list
    except:
        pass

    torch.cuda.empty_cache()
Exemple #10
0
def im_detect(model, im_dir, image_id, Test_RCNN, fastText, prior_mask,
              Action_dic_inv, object_thres, human_thres, prior_flag, detection,
              detect_app_dict, device, cfg):

    # im_orig, im_shape = get_blob(image_id, cfg)

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    if "train" in im_dir:
        im_file = os.path.join(
            DATA_DIR, im_dir,
            'COCO_train2014_' + (str(image_id)).zfill(12) + '.jpg')
    else:
        im_file = os.path.join(
            DATA_DIR, im_dir,
            'COCO_val2014_' + (str(image_id)).zfill(12) + '.jpg')
    img_original = Image.open(im_file)
    img_original = img_original.convert('RGB')
    # when using Image.open to read images, img.size= (640, 480), while using cv2.imread, im.shape = (480, 640)
    # to be consistent with previous code, I used img.height, img.width here
    im_shape = (img_original.height, img_original.width)  # (480, 640)
    transforms = build_transforms(cfg, is_train=False)

    for Human_out in Test_RCNN[image_id]:
        if (np.max(Human_out[5]) > human_thres) and (
                Human_out[1] == 'Human'):  # This is a valid human

            O_box = np.empty((0, 4), dtype=np.float32)
            O_vec = np.empty((0, 300), dtype=np.float32)
            Pattern = np.empty((0, 2, 64, 64), dtype=np.float32)
            O_score = np.empty((0, 1), dtype=np.float32)
            O_class = np.empty((0, 1), dtype=np.int32)
            Weight_mask = np.empty((0, 29), dtype=np.float32)

            for Object in Test_RCNN[image_id]:
                if (np.max(Object[5]) > object_thres) and not (np.all(
                        Object[2] == Human_out[2])):  # This is a valid object
                    O_box_ = np.array([
                        Object[2][0], Object[2][1], Object[2][2], Object[2][3]
                    ]).reshape(1, 4)
                    O_box = np.concatenate((O_box, O_box_), axis=0)

                    O_vec_ = fastText[Object[4]]
                    O_vec = np.concatenate((O_vec, O_vec_), axis=0)

                    # Pattern_ = Get_next_sp(Human_out[2], Object[2]).reshape(1, 64, 64, 2)
                    # Pattern  = np.concatenate((Pattern, Pattern_), axis=0)
                    Pattern_ = generate_spatial(Human_out[2],
                                                Object[2]).reshape(
                                                    1, 2, 64, 64)
                    Pattern = np.concatenate((Pattern, Pattern_), axis=0)

                    O_score = np.concatenate(
                        (O_score, np.max(Object[5]).reshape(1, 1)), axis=0)
                    O_class = np.concatenate(
                        (O_class, np.array(Object[4]).reshape(1, 1)), axis=0)

                    Weight_mask_ = prior_mask[:, Object[4]].reshape(1, 29)
                    Weight_mask = np.concatenate((Weight_mask, Weight_mask_),
                                                 axis=0)

            H_box = np.array([
                Human_out[2][0], Human_out[2][1], Human_out[2][2],
                Human_out[2][3]
            ]).reshape(1, 4)

            if len(O_box) == 0:
                continue

            blobs = {}
            pos_num = len(O_box)
            blobs['pos_num'] = pos_num
            # blobs['dropout_is_training'] = False
            human_boxes_cpu = np.tile(H_box,
                                      [len(O_box), 1]).reshape(pos_num, 4)
            human_boxes = torch.FloatTensor(human_boxes_cpu)
            object_boxes_cpu = O_box.reshape(pos_num, 4)
            object_boxes = torch.FloatTensor(object_boxes_cpu)

            blobs['object_word_embeddings'] = torch.FloatTensor(O_vec).reshape(
                pos_num, 300)

            human_boxlist = BoxList(human_boxes,
                                    img_original.size,
                                    mode="xyxy")  # image_size=(width, height)
            object_boxlist = BoxList(object_boxes,
                                     img_original.size,
                                     mode="xyxy")  # image_size=(width, height)

            img, human_boxlist, object_boxlist = transforms(
                img_original, human_boxlist, object_boxlist)

            spatials = []
            for human_box, object_box in zip(human_boxlist.bbox,
                                             object_boxlist.bbox):
                ho_spatial = generate_spatial(human_box.numpy(),
                                              object_box.numpy()).reshape(
                                                  1, 2, 64, 64)
                spatials.append(ho_spatial)
            blobs['spatials'] = torch.FloatTensor(spatials).reshape(
                -1, 2, 64, 64)
            blobs['human_boxes'], blobs['object_boxes'] = (human_boxlist, ), (
                object_boxlist, )

            for key in blobs.keys():
                if not isinstance(blobs[key], int) and not isinstance(
                        blobs[key], tuple):
                    blobs[key] = blobs[key].to(device)
                elif isinstance(blobs[key], tuple):
                    blobs[key] = [boxlist.to(device) for boxlist in blobs[key]]

            image_list = to_image_list(img, cfg.DATALOADER.SIZE_DIVISIBILITY)
            image_list = image_list.to(device)

            # compute predictions
            model.eval()
            with torch.no_grad():
                prediction_HO, prediction_H, prediction_O, prediction_sp = model(
                    image_list, blobs)

            #convert to np.array
            # test h + o branch Only
            prediction_HO = prediction_H * prediction_O

            prediction_HO = prediction_HO.data.cpu().numpy()
            prediction_H = prediction_H.data.cpu().numpy()
            prediction_O = prediction_O.data.cpu().numpy()
            # prediction_sp = prediction_sp.data.cpu().numpy()

            dic_save = {}
            dic_save['image_id'] = image_id
            dic_save['person_box'] = Human_out[2]
            dic_save['person_score'] = np.max(Human_out[5])
            dic_save['prediction_HO'] = prediction_HO
            dic_save['prediction_H'] = prediction_H
            dic_save['prediction_O'] = prediction_O
            dic_save['o_class'] = O_class
            dic_save['object_boxes_cpu'] = object_boxes_cpu
            dic_save['O_score'] = O_score

            detect_app_dict[image_id].append(dic_save)

            if prior_flag == 1:
                prediction_HO = apply_prior_Graph(O_class, prediction_HO)
            if prior_flag == 2:
                prediction_HO = prediction_HO * Weight_mask
            if prior_flag == 3:
                prediction_HO = apply_prior_Graph(O_class, prediction_HO)
                prediction_HO = prediction_HO * Weight_mask

            # save image information
            dic = {}
            dic['image_id'] = image_id
            dic['person_box'] = Human_out[2]

            Score_obj = prediction_HO * O_score
            Score_obj = np.concatenate((object_boxes_cpu, Score_obj), axis=1)

            # Find out the object box associated with highest action score
            max_idx = np.argmax(Score_obj, 0)[4:]

            # agent mAP
            for i in range(29):
                #'''
                # walk, smile, run, stand
                if (i == 3) or (i == 17) or (i == 22) or (i == 27):
                    agent_name = Action_dic_inv[i] + '_agent'
                    dic[agent_name] = np.max(Human_out[5]) * prediction_H[0][i]
                    continue

                # cut
                if i == 2:
                    agent_name = 'cut_agent'
                    dic[agent_name] = np.max(Human_out[5]) * max(
                        Score_obj[max_idx[2]][4 + 2],
                        Score_obj[max_idx[4]][4 + 4])
                    continue
                if i == 4:
                    continue

                # eat
                if i == 9:
                    agent_name = 'eat_agent'
                    dic[agent_name] = np.max(Human_out[5]) * max(
                        Score_obj[max_idx[9]][4 + 9],
                        Score_obj[max_idx[16]][4 + 16])
                    continue
                if i == 16:
                    continue

                # hit
                if i == 19:
                    agent_name = 'hit_agent'
                    dic[agent_name] = np.max(Human_out[5]) * max(
                        Score_obj[max_idx[19]][4 + 19],
                        Score_obj[max_idx[20]][4 + 20])
                    continue
                if i == 20:
                    continue

                # These 2 classes need to save manually because there is '_' in action name
                if i == 6:
                    agent_name = 'talk_on_phone_agent'
                    dic[agent_name] = np.max(
                        Human_out[5]) * Score_obj[max_idx[i]][4 + i]
                    continue

                if i == 8:
                    agent_name = 'work_on_computer_agent'
                    dic[agent_name] = np.max(
                        Human_out[5]) * Score_obj[max_idx[i]][4 + i]
                    continue

                # all the rest
                agent_name = Action_dic_inv[i].split("_")[0] + '_agent'
                dic[agent_name] = np.max(
                    Human_out[5]) * Score_obj[max_idx[i]][4 + i]

            # role mAP
            for i in range(29):
                # walk, smile, run, stand. Won't contribute to role mAP
                if (i == 3) or (i == 17) or (i == 22) or (i == 27):
                    dic[Action_dic_inv[i]] = np.append(
                        np.full(4, np.nan).reshape(1, 4),
                        np.max(Human_out[5]) * prediction_H[0][i])
                    continue

                # Impossible to perform this action
                if np.max(Human_out[5]) * Score_obj[max_idx[i]][4 + i] == 0:
                    dic[Action_dic_inv[i]] = np.append(
                        np.full(4, np.nan).reshape(1, 4),
                        np.max(Human_out[5]) * Score_obj[max_idx[i]][4 + i])

                # Action with >0 score
                else:
                    dic[Action_dic_inv[i]] = np.append(
                        Score_obj[max_idx[i]][:4],
                        np.max(Human_out[5]) * Score_obj[max_idx[i]][4 + i])

            detection.append(dic)
Exemple #11
0
def make_data_loader(root_path,
                     cfg,
                     is_train=True,
                     is_distributed=False,
                     start_iter=0,
                     class_ids=None,
                     ignore_labels=False):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(
            "maskrcnn_benchmark.dataset_gtboxframe.make_data_loader")
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(
        cfg, is_train)

    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    if not is_train and not ignore_labels:
        assert class_ids is not None, "For validation datasets, class_ids has to be provided!"

    datasets = [
        build_detection_dataset_by_name(root_path,
                                        name,
                                        transforms,
                                        class_ids=class_ids,
                                        cache_images=False,
                                        ignore_labels=ignore_labels)
        for name in dataset_list
    ]

    if is_train:
        assert len(
            datasets
        ) == 1, "Can train on only one dataset, otherwise have to merge classes"
        class_ids = datasets[0].get_class_ids()

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
            BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0], class_ids
    return data_loaders
Exemple #12
0
def cross_do_train(
        cfg,
        model,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        distributed
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start cross training!")
    meters = MetricLogger(delimiter="  ")
    max_iter = cfg.SOLVER.MAX_ITER
    start_iter = arguments["iteration"]
    model.train()
    # ----------------prepare----------------
    # ---------------------------------------
    # ---------------------------------------
    is_train = True
    paths_catalog = import_file(
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )
    DatasetCatalog = paths_catalog.DatasetCatalog
    transforms = build_transforms(cfg, is_train=is_train)
    dataset_list = cfg.DATASETS.TRAIN

    start_training_time = time.time()
    end = time.time()

    summary_writer = None
    if get_rank() == 0:
        import tensorboardX
        summary_writer = tensorboardX.SummaryWriter(os.path.join(checkpointer.save_dir, 'tf_logs'))

    ann_file = cfg.TEST.PSEUDO_LABELS_ANN_FILE
    images_dir = cfg.TEST.TEST_IMAGES_DIR

    iteration = start_iter
    total_steps = cfg.SOLVER.CROSS_TRAIN_STEPS
    for step in range(total_steps):
        logger.info('Start training {}th/{} step'.format(step + 1, total_steps))
        iter_per_step = cfg.SOLVER.ITER_PER_STEP

        pseudo_dataset = RPCPseudoDataset(images_dir=images_dir, ann_file=ann_file, use_density_map=True, transforms=transforms)
        # ---------------------------------------------------------------------------------
        pseudo_dataset.density_categories = cfg.MODEL.DENSITY_HEAD.NUM_CLASSES
        pseudo_dataset.density_map_stride = cfg.MODEL.DENSITY_HEAD.FPN_LEVEL_STRIDE
        min_sigmas = {
            1: 1.0,
            2: 0.5,
            3: 0.333,
        }
        min_sigma = min_sigmas[cfg.MODEL.DENSITY_HEAD.FPN_LEVEL]
        pseudo_dataset.density_min_sigma = min_sigma
        print('using density_min_sigma: {}'.format(min_sigma))
        # ---------------------------------------------------------------------------------

        train_datasets = build_dataset(cfg, dataset_list, transforms, DatasetCatalog, is_train)

        ratio = cfg.SOLVER.CROSS_TRAIN_DATA_RATIO
        if ratio > 0:  # dynamic source dataset according to pseudo dataset
            assert len(train_datasets) == 1
            train_size = len(train_datasets[0])
            indices = np.arange(train_size)
            train_size = min(train_size, int(ratio * len(pseudo_dataset)))
            indices = np.random.choice(indices, size=train_size, replace=False)
            subset_dataset = Subset(train_datasets[0], indices=indices)
            train_datasets = [subset_dataset]
        elif ratio < 0:  # fixed size source dataset
            assert len(train_datasets) == 1
            train_size = len(train_datasets[0])
            indices = np.arange(train_size)
            train_size = min(train_size, abs(ratio))
            indices = np.random.choice(indices, size=train_size, replace=False)
            subset_dataset = Subset(train_datasets[0], indices=indices)
            train_datasets = [subset_dataset]

        datasets_s = train_datasets + [pseudo_dataset]
        datasets_s = ConcatDataset(datasets_s)

        # logger.info('Subset train dataset: {}'.format(len(subset_dataset)))
        logger.info('Pseudo train dataset: {}'.format(len(pseudo_dataset)))
        logger.info('Source train dataset: {}'.format(len(datasets_s)))

        data_loader_t = make_data_loader(
            cfg,
            is_train=is_train,
            is_distributed=distributed,
            start_iter=0,
            datasets=[datasets_s],
            num_iters=iter_per_step
        )

        thresholds = [0.8, 0.85, 0.9, 0.95]
        threshold = thresholds[bisect_right([5, 8, 9], step)]
        for (images_t, targets_t, _) in data_loader_t:
            data_time = time.time() - end
            iteration = iteration + 1
            arguments["iteration"] = iteration

            scheduler.step()

            # images_s = images_s.to(device)
            # targets_s = [target.to(device) for target in targets_s]
            # loss_dict_s = model(images_s, targets_s, is_target_domain=False)
            # loss_dict_s = {key + '_s': value for key, value in loss_dict_s.items()}

            images_t = images_t.to(device)
            targets_t = [target.to(device) for target in targets_t]
            loss_dict = model(images_t, targets_t, is_target_domain=True)

            # loss_dict.update(loss_dict_s)

            losses = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time, data=data_time)

            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

            if iteration % 20 == 0 or iteration == max_iter:
                if summary_writer:
                    summary_writer.add_scalar('loss/total_loss', losses_reduced, global_step=iteration)
                    for name, value in loss_dict_reduced.items():
                        summary_writer.add_scalar('loss/%s' % name, value, global_step=iteration)
                    summary_writer.add_scalar('lr', optimizer.param_groups[0]["lr"], global_step=iteration)

                logger.info(
                    meters.delimiter.join(
                        [
                            "eta: {eta}",
                            "iter: {iter}",
                            "{meters}",
                            "lr: {lr:.6f}",
                            "max mem: {memory:.0f}",
                        ]
                    ).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(meters),
                        lr=optimizer.param_groups[0]["lr"],
                        memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                    )
                )
            if iteration % checkpoint_period == 0:
                checkpointer.save("model_{:07d}".format(iteration), **arguments)
                if iteration != max_iter:
                    eval_results = do_test(cfg, model, distributed, threshold, iteration=iteration)
                    if get_rank() == 0 and summary_writer:  # only on main thread results are returned.
                        for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
                            write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration)
                    model.train()  # restore train state

        logger.info('Generating new pseudo labels...')
        test_dataset = RPCTestDataset(images_dir=cfg.TEST.TEST_IMAGES_DIR,
                                      ann_file=cfg.TEST.TEST_ANN_FILE,
                                      transforms=build_transforms(cfg, is_train=False))
        dataset_name = 'rpc_2019_test'
        dataset_names = [dataset_name]
        eval_results = do_test(cfg, model, distributed, threshold, iteration=iteration, generate_pseudo_labels=True, dataset_names=dataset_names,
                               datasets=[test_dataset], use_ground_truth=True)
        if get_rank() == 0 and summary_writer:  # only on main thread results are returned.
            for eval_result, dataset in zip(eval_results, dataset_names):
                write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration)
        model.train()  # restore train state
        ann_file = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name, 'pseudo_labeling.json')

    checkpointer.save("model_final", **arguments)
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Exemple #13
0
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0, is_final_test=False, is_target_task=False, icwt_21_objs=False):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file(
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )

    DatasetCatalog = paths_catalog.DatasetCatalog
    if os.path.exists(cfg.DATA_DIR):
        DatasetCatalog.DATA_DIR = cfg.DATA_DIR
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    transforms = build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train, is_target_task, icwt_21_objs)

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)

    if is_final_test:
        return data_loaders

    assert len(data_loaders) == 1
    return data_loaders[0]