예제 #1
0
    def __init__(self,
                 h5_path,
                 base_path,
                 mode,
                 image_size=(64, 64),
                 mask_size=0,
                 normalize_images=True,
                 max_objects=10,
                 max_samples=None,
                 include_dummies=False,
                 include_relationships=True,
                 use_orphaned_objects=True,
                 debug=False,
                 learned_converse=False,
                 use_transitivity=False,
                 use_converse=False,
                 learned_transitivity=False,
                 learned_symmetry=False,
                 dense_scenes=False,
                 sort_ids=None,
                 eval_func=None):
        super(CLEVRDialogDataset, self).__init__()

        self.image_dir = os.path.join(base_path, 'images')
        self.image_size = image_size
        self.use_transitivity = use_transitivity
        self.mode = mode

        # objects
        self.vocab = {}
        self.vocab["use_object_embedding"] = False
        self.vocab['pred_name_to_idx'] = {
            '__in_image__': 0,
            'right': 1,
            "behind": 2,
            "front": 3,
            "left": 4,
            '__padding__': 5
        }
        self.vocab['pred_idx_to_name'] = {
            v: k
            for k, v in self.vocab['pred_name_to_idx'].items()
        }

        # attributes, currently ignored.
        self.vocab["attributes"] = {}
        self.vocab["attributes"]['shape'] = {
            '__image__': 0,
            'cube': 1,
            'sphere': 2,
            'cylinder': 3
        }
        self.vocab["attributes"]["color"] = {
            '__image__': 0,
            'gray': 1,
            'red': 2,
            'blue': 3,
            'green': 4,
            'brown': 5,
            'purple': 6,
            'cyan': 7,
            'yellow': 8
        }
        self.vocab["attributes"]["material"] = {
            '__image__': 0,
            'rubber': 1,
            'metal': 2
        }
        self.vocab["attributes"]["size"] = {
            '__image__': 0,
            'small': 1,
            'large': 2
        }

        self.vocab["reverse_attributes"] = {}
        for attr in self.vocab["attributes"].keys():
            self.vocab["reverse_attributes"][attr] = {
                v: k
                for k, v in self.vocab["attributes"][attr].items()
            }

        self.vocab['object_name_to_idx'] = {}
        ind = 0
        for attr in self.vocab["attributes"].keys():
            for attr_label in self.vocab["attributes"][attr].keys():
                if ind != 0:
                    keyy = "{}_{}".format(attr_label, ind)
                    self.vocab['object_name_to_idx'][keyy] = ind
                else:
                    # __image__
                    self.vocab['object_name_to_idx'][attr_label] = ind
                ind += 1

        self.use_orphaned_objects = use_orphaned_objects
        self.max_objects = max_objects
        self.max_samples = max_samples
        self.include_relationships = include_relationships
        self.image_paths = []
        transform = [Resize(image_size), T.ToTensor()]
        if normalize_images:
            # transform.append(imagenet_preprocess())
            transform.append(encode_image())
        self.transform = T.Compose(transform)

        if debug:
            self.clevr_data = pickle.load(open("clevr_data_sample.pkl", 'rb'))
            self.dialog_data = pickle.load(open("dialog_data_sample.pkl",
                                                'rb'))
        else:
            self.clevr_data = json.load(
                open(
                    os.path.join(
                        base_path,
                        'scenes/CLEVR_{mode}_scenes.json'.format(mode=mode)),
                    'rb'))
            self.dialog_data = json.load(
                open(os.path.join(base_path, h5_path), 'rb'))

        if dense_scenes:
            self.keep_dense_scenes()

        if sort_ids:
            # Replace scenes
            self.keep_scenes_per_id(sort_ids, eval_func)
예제 #2
0
    def __init__(self,
                 h5_path,
                 base_path,
                 mode,
                 image_size=(256, 256),
                 mask_size=0,
                 normalize_images=True,
                 min_objects=0,
                 max_objects=1000,
                 max_samples=None,
                 include_relationships=True,
                 use_orphaned_objects=True,
                 use_transitivity=False,
                 learned_transitivity=False,
                 include_dummies=True,
                 learned_symmetry=False,
                 use_converse=False,
                 learned_converse=False):
        super(PackedVGSceneGraphDataset, self).__init__()
        self.include_dummies = include_dummies
        self.learned_transitivity = learned_transitivity
        self.image_dir = os.path.join(base_path, "images")
        self.image_size = image_size
        self.mask_size = mask_size
        self.vocab = json.load(open(os.path.join(base_path, 'vocab.json')))
        self.num_objects = len(self.vocab['object_idx_to_name'])
        self.use_orphaned_objects = use_orphaned_objects
        self.max_objects = max_objects
        self.max_samples = max_samples
        self.include_relationships = include_relationships
        self.min_objects = min_objects
        self.learned_symmetry = learned_symmetry
        self.learned_converse = learned_converse
        transform = [Resize(image_size), T.ToTensor()]

        if normalize_images:
            transform.append(encode_image())
        self.transform = T.Compose(transform)

        if use_transitivity or use_converse:
            raise NotImplementedError()

        # Load pretrained data
        self.data = {}
        with h5py.File(os.path.join(base_path, h5_path), 'r') as f:
            for k, v in f.items():
                if k == 'image_paths':
                    self.image_paths = list(v)
                else:
                    self.data[k] = torch.IntTensor(np.asarray(v))

        if self.min_objects > 0:
            col_len = len(self.data["objects_per_image"])
            objects_mask = (self.data['objects_per_image'] >=
                            self.min_objects).nonzero()[:, 0]
            cols = [
                col for col in self.data.keys()
                if len(self.data[col]) == col_len
            ]
            for col in cols:
                self.data[col] = self.data[col][objects_mask]
            self.image_paths = np.array(self.image_paths)[objects_mask]

        self.vocab["attributes"] = {}
        self.vocab["attributes"]['objects'] = self.vocab['object_name_to_idx']
        self.vocab["reverse_attributes"] = {}
        for attr in self.vocab["attributes"].keys():
            self.vocab["reverse_attributes"][attr] = {
                v: k
                for k, v in self.vocab["attributes"][attr].items()
            }
        self.register_augmented_relations()
예제 #3
0
    def __init__(self, base_path, mode, image_size=(64, 64), mask_size=0,
                 normalize_images=True, min_objects=10, max_objects=10, max_samples=None,
                 include_relationships=True, use_orphaned_objects=True, debug=False,
                 learned_transitivity=False, include_dummies=True, use_transitivity=False,
                 use_all_relations=False, use_converse=False, learned_symmetry=False,
                 learned_converse=False):
        super(PackedGenCLEVRDataset, self).__init__()

        self.image_dir = os.path.join(base_path, 'images')
        self.image_size = image_size
        self.mask_size = mask_size
        self.learned_transitivity = learned_transitivity
        self.learned_symmetry = learned_symmetry
        self.learned_converse = learned_converse
        self.include_dummies = include_dummies
        self.use_transitivity = use_transitivity
        self.use_all_relations = use_all_relations
        self.use_converse = use_converse
        self.use_orphaned_objects = use_orphaned_objects
        self.max_objects = max_objects
        self.min_objects = min_objects
        self.max_samples = max_samples
        self.include_relationships = include_relationships
        self.mode = mode

        # objects
        self.vocab = {}
        self.vocab["use_object_embedding"] = False

        # predicates
        self.register_augmented_relations()

        # attributes, currently ignored.
        self.vocab["attributes"] = {}
        self.vocab["attributes"]['shape'] = {'__image__': 0, 'cube': 1, 'sphere': 2, 'cylinder': 3}
        self.vocab["attributes"]["color"] = {'__image__': 0, 'gray': 1, 'red': 2, 'blue': 3, 'green': 4, 'brown': 5,
                                             'purple': 6, 'cyan': 7, 'yellow': 8}
        self.vocab["attributes"]["material"] = {'__image__': 0, 'rubber': 1, 'metal': 2}
        self.vocab["attributes"]["size"] = {'__image__': 0, 'small': 1, 'large': 2}
        self.vocab["reverse_attributes"] = {}
        for attr in self.vocab["attributes"].keys():
            self.vocab["reverse_attributes"][attr] = {v: k for k, v in self.vocab["attributes"][attr].items()}

        self.vocab['object_name_to_idx'] = {}
        ind = 0
        for attr in self.vocab["attributes"].keys():
            for attr_label in self.vocab["attributes"][attr].keys():
                if ind != 0:
                    keyy = "{}_{}".format(attr_label, ind)
                    self.vocab['object_name_to_idx'][keyy] = ind
                else:
                    # __image__
                    self.vocab['object_name_to_idx'][attr_label] = ind
                ind += 1
        self.vocab['object_idx_to_name'] = {}
        for k, v in self.vocab['object_name_to_idx'].items():
            self.vocab['object_idx_to_name'][v] = k

        self.image_paths = []
        transform = [Resize(image_size), T.ToTensor()]
        if normalize_images:
            # transform.append(imagenet_preprocess())
            transform.append(encode_image())
        self.transform = T.Compose(transform)

        # Load data
        if debug:
            self.data = self.create_packed_sgs()