def build_dataset(cfg):
    data_cfg = cfg.DATA_CFG
    splits = data_cfg.SPLITS
    for split in splits:
        dataset_name, standard_format_json_file = get_dataset_name_and_json(data_cfg, split)
        # if not os.path.exists(standard_format_json_file) or data_cfg.RECOMPUTE_DATA:
        register_dataset(data_cfg, split)
        if data_cfg.REPROCESS_RAW_VIDEOS:
            raw_to_detectron = RAW_PROCESSORS_MAP[data_cfg.BASE_NAME]
            raw_to_detectron(data_cfg, split, standard_format_json_file)
            write_coco_format_json(data_cfg, split)

        if data_cfg.TRAINED_DETECTOR.DO_INFERENCE:
            write_with_inferred_boxes(data_cfg, split)

        if data_cfg.TRAINED_DERENDER.DO_INFERENCE:
            for attributes_key in data_cfg.TRAINED_DERENDER.ATTRIBUTES_WEIGHTS_MAP:
                write_with_inferred_attributes(data_cfg, split, attributes_key)

        if data_cfg.SHAPESWORLD_JSON.REPROCESS:
            for attributes_to_use in data_cfg.ATTRIBUTES_KEYS:
                JsonGenerator(data_cfg, split, "shapesworld", attributes_to_use,
                              vel_data_assoc="None")

        if data_cfg.ADEPT_JSON.REPROCESS:
            for attributes_to_use in data_cfg.ADEPT_JSON.ATTRIBUTES_KEYS:
                JsonGenerator(data_cfg, split, "adept", attributes_to_use,
                              vel_data_assoc=data_cfg.ADEPT_JSON.VEL_DATA_ASSOC)
Exemple #2
0
    def __init__(self, cfg, split, max_obj=10):
        self.cfg = cfg
        self.split = split
        data_cfg = cfg.DATA_CFG
        num_frames = utils.get_num_frames(data_cfg, split)
        dataset_name, standard_format_json_file = utils.get_dataset_name_and_json(
            data_cfg, split)
        print("reading datasets {}".format(dataset_name))
        start = time.time()
        self.dataset_dicts = utils.get_data_dicts(standard_format_json_file,
                                                  num_frames)

        # Redoing derender_dataset from trainable_derenderer without detectron
        required_fields = [
            "pred_box"
        ] if cfg.MODULE_CFG.DATASETS.USE_PREDICTED_BOXES else ["bbox"]
        required_fields += ["attributes"]
        # _, self.dataset_dicts = image_based_to_annotation_based(self.dataset_dicts,required_fields)

        # I've edited DerenderAttrbutes to basically do nothing.  This allows us
        # to call it without registering the dataset with detectron, but it also
        # loses a lot of the functionality it had.
        self.attributes = DerenderAttributes(cfg.MODULE_CFG)

        self.mapper = trainers.trainable_derender.ImageBasedDerenderMapper(
            cfg.MODULE_CFG.DATASETS.USE_PREDICTED_BOXES,
            self.attributes,
            False,  #for_inference,
            use_depth=cfg.MODULE_CFG.DATASETS.USE_DEPTH)

        non_visibles = []
        for i in range(len(self.dataset_dicts)):
            visibles = 0
            has_wall = False
            has_occluder = False
            d = self.dataset_dicts[i]
            for annotation in d["annotations"]:
                visibles += (annotation["attributes"]["visible"] *
                             (annotation["attributes"]["type"] == 0)
                             )  #or annotation["attributes"]["type"] == 1))
                has_wall = has_wall or (annotation["attributes"]["visible"] and
                                        annotation["attributes"]["type"] == 3)
                has_occluder = has_occluder or (
                    annotation["attributes"]["visible"]
                    and annotation["attributes"]["type"] == 1)
            if (visibles == 0) or has_wall or has_occluder:
                non_visibles.append(i)
        # non_visibles.reverse()
        for idx in range(len(non_visibles) - 1, -1, -1):
            self.dataset_dicts.pop(non_visibles[idx])

        # self.dataset_dicts = list(map(self.mapper, self.dataset_dicts))
        # self.dataset_dicts = [self.mapper(x) for x in self.dataset_dicts]
        print("done after {}".format(time.time() - start))
def write_coco_format_json(cfg, split):
    timer = CodeTimer("writting to coco")
    dataset_name, standard_format_json_file = get_dataset_name_and_json(cfg, split)

    dataset_dicts = DatasetCatalog.get(dataset_name)
    _,filtered_dicts = filter_dataset(dataset_dicts, required_fields=["bbox", "bbox_mode", "segmentation"])
    register_dataset(cfg, split, getter= lambda: filtered_dicts, name=dataset_name+"_for_coco")

    coco_dict = convert_to_coco_dict(dataset_name+"_for_coco")

    json_format_file = standard_format_json_file.replace(".json", "_coco_format.json")
    with open(json_format_file, "w") as f:
        json.dump(coco_dict, f)
    timer.done()
    def __init__(self, cfg, split):
        self.cfg = cfg
        self.split = split
        data_cfg = cfg.DATA_CFG
        num_frames = utils.get_num_frames(data_cfg, split)
        dataset_name, standard_format_json_file = utils.get_dataset_name_and_json(data_cfg,split)
        print("reading datasets {}".format(dataset_name))
        start = time.time()
        self.dataset_dicts = utils.get_data_dicts(standard_format_json_file, num_frames)

        # Redoing derender_dataset from trainable_derenderer without detectron
        required_fields = ["pred_box"] if cfg.MODULE_CFG.DATASETS.USE_PREDICTED_BOXES else ["bbox"]
        required_fields += ["attributes"]
         _, self.dataset_dicts = image_based_to_annotation_based(self.dataset_dicts,required_fields)
def write_with_inferred_boxes(cfg, split):
    # TODO: now there are invisible objects the detection mapper  ignores that, will have to debug tomorrow
    timer = CodeTimer("adding inferred boxes")
    module_cfg = os.path.join(cfg.TRAINED_DETECTOR.EXP_DIR, "config.yaml")
    module_cfg = load_cfg_from_file(module_cfg)
    module_cfg.MODEL.WEIGHTS = cfg.TRAINED_DETECTOR.WEIGHTS_FILE
    if cfg.DEBUG:
        module_cfg.DATALOADER.NUM_WORKERS = 0

    predictor = DetectorPredictor(module_cfg)

    dataset_name, standard_format_json_file = get_dataset_name_and_json(
        cfg, split)
    data_loader = inference_detection_loader(
        module_cfg.clone(), dataset_name, DetectionMapper(module_cfg.clone()))

    worker_args = []
    with torch.no_grad():
        for inputs in data_loader:
            outputs = predictor(inputs)
            for i in range(len(outputs)):
                worker_args.append(parse_worker_args(inputs[i], outputs[i]))

    if cfg.DEBUG:
        new_dicts = [add_inferred_boxes(*w) for w in worker_args]
    else:
        with Pool(int(cpu_count() / 4)) as p:
            new_dicts = p.starmap(add_inferred_boxes, worker_args)

    if 'PRED_BOX_SCORE_THRESHOLD' not in cfg:
        assert '_val' in split, "start with validation split to compute detection threshold"
        cfg.PRED_BOX_SCORE_THRESHOLD = infer_score_threshold(new_dicts)

    new_dicts = filter_predicted_boxes_threshold(new_dicts,
                                                 cfg.PRED_BOX_SCORE_THRESHOLD)

    with open(standard_format_json_file, 'w') as f:
        json.dump(new_dicts, f, indent=4)

    timer.done()
Exemple #6
0
    def build_physics_jsons(self, data_cfg, split):
        dataset_name, standard_format_json_file = get_dataset_name_and_json(
            data_cfg, split)
        dataset = DatasetCatalog.get(dataset_name)
        required_fields_values = {self.attributes_key: {"visible": 1}}

        _, dataset = filter_dataset(
            dataset, required_fields_values=required_fields_values)
        videos_dicts = frames2videos(dataset)
        out_dir = get_jsons_directory(data_cfg, self.target_physics,
                                      self.attributes_key, dataset_name)
        os.makedirs(out_dir, exist_ok=True)

        worker_args = [(data_cfg, vid_dict, out_dir, vid_num)
                       for vid_num, vid_dict in videos_dicts.items()]

        if data_cfg.DEBUG:
            [self.video2json(*w) for w in worker_args]
        else:
            with Pool(int(cpu_count())) as p:
                p.starmap(self.video2json, worker_args)
        self.timer.done()
def write_with_inferred_attributes(cfg, split, attributes_key):
    timer = CodeTimer(
        "adding inferred attributes split:{}, attributes_key:{}".format(
            split, attributes_key))
    module_cfg = os.path.join(cfg.TRAINED_DERENDER.EXP_DIR, "cfg.yaml")
    module_cfg = load_cfg_from_file(module_cfg)
    module_cfg.MODEL.WEIGHTS = cfg.TRAINED_DERENDER.ATTRIBUTES_WEIGHTS_MAP[
        attributes_key]

    module_cfg.DATALOADER.OBJECTS_PER_BATCH = 1000 if cfg.BASE_NAME == "intphys" else 450
    module_cfg.DATALOADER.NUM_WORKERS = 8 if cfg.BASE_NAME == "adept" else module_cfg.DATALOADER.NUM_WORKERS

    if cfg.DEBUG:
        module_cfg.DATALOADER.NUM_WORKERS = 0
        module_cfg.DEBUG = True
        module_cfg.DATALOADER.OBJECTS_PER_BATCH = 50

    predictor = DerenderPredictor(module_cfg)

    # if not cfg.DEBUG:
    #     gpu_ids = [_ for _ in range(torch.cuda.device_count())]
    #     predictor.derenderer = torch.nn.parallel.DataParallel(predictor.derenderer, gpu_ids)

    dataset_name, standard_format_json_file = get_dataset_name_and_json(
        cfg, split)
    dataset = DatasetCatalog.get(dataset_name)
    required_fields = [
        "pred_box"
    ] if cfg.TRAINED_DERENDER.USE_INFERRED_BOXES else ["bbox"]
    filtered_idx, \
    mapped_dataset = image_based_to_annotation_based(dataset, required_fields)
    mapped_dataset = DatasetFromList(mapped_dataset, copy=False)
    mapper = DerenderMapper(cfg.TRAINED_DERENDER.USE_INFERRED_BOXES,
                            predictor.attributes,
                            for_inference=True,
                            use_depth=cfg.TRAINED_DERENDER.USE_DEPTH)
    mapped_dataset = MapDataset(mapped_dataset, mapper)

    data_loader = DataLoader(
        dataset=mapped_dataset,
        batch_size=module_cfg.DATALOADER.OBJECTS_PER_BATCH,
        num_workers=module_cfg.DATALOADER.NUM_WORKERS,
        shuffle=False)

    fil_pointer = 0
    with torch.no_grad():
        for inputs in data_loader:
            inputs = to_cuda(inputs)
            outputs = predictor(inputs)
            batch_size = list(outputs.values())[0].shape[0]
            for oix, (img_idx, an_idx) in zip(
                    range(batch_size),
                    filtered_idx[fil_pointer:fil_pointer + batch_size]):

                dataset[img_idx]["annotations"][an_idx][attributes_key] = \
                    {k: v[oix].item() for k, v in outputs.items()}
                # {k: v[oix].item() if v[oix].size == 1
                #                   else [float(el) for el in v[oix]]
                # for k,v in outputs.items()}

            fil_pointer = fil_pointer + batch_size

    dataset = [fix_for_serialization(d) for d in dataset]

    with open(standard_format_json_file, "w") as f:
        json.dump(dataset, f, indent=4)

    timer.done()