예제 #1
0
class ModelPrior():
    def __init__(self):
        pass

    def learn(self, data_folder="bedroom_final", data_root_dir=None):
        if not data_root_dir:
            data_root_dir = utils.get_data_root_dir()
        data_dir = f"{data_root_dir}/{data_folder}"
        self.data_dir = data_dir
        self.category_map = ObjectCategories()

        files = os.listdir(data_dir)
        files = [
            f for f in files
            if ".pkl" in f and not "domain" in f and not "_" in f
        ]

        self.categories = self.category_map.all_non_arch_categories(
            data_root_dir, data_folder)
        self.cat_to_index = {
            self.categories[i]: i
            for i in range(len(self.categories))
        }

        with open(f"{data_dir}/model_frequency", "r") as f:
            lines = f.readlines()
            models = [line.split()[0] for line in lines]
            self.model_freq = [int(l[:-1].split()[1]) for l in lines]

        self.models = [
            model for model in models if not self.category_map.is_arch(
                self.category_map.get_final_category(model))
        ]
        self.model_to_index = {
            self.models[i]: i
            for i in range(len(self.models))
        }

        N = len(self.models)
        self.num_categories = len(self.categories)

        self.model_index_to_cat = [
            self.cat_to_index[self.category_map.get_final_category(
                self.models[i])] for i in range(N)
        ]

        self.count = [[0 for i in range(N)] for j in range(N)]

        for index in range(len(files)):
            #for index in range(100):
            with open(f"{data_dir}/{index}.pkl", "rb") as f:
                (_, _, nodes), _ = pickle.load(f)

            object_nodes = []
            for node in nodes:
                modelId = node["modelId"]
                category = self.category_map.get_final_category(modelId)
                if not self.category_map.is_arch(category):
                    object_nodes.append(node)

            for i in range(len(object_nodes)):
                for j in range(i + 1, len(object_nodes)):
                    a = self.model_to_index[object_nodes[i]["modelId"]]
                    b = self.model_to_index[object_nodes[j]["modelId"]]
                    self.count[a][b] += 1
                    self.count[b][a] += 1
            print(index)

        self.N = N

    def save(self, dest=None):
        if dest == None:
            dest = f"{self.data_dir}/model_prior.pkl"
        with open(dest, "wb") as f:
            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)

    def load(self, data_dir):
        source = f"{data_dir}/model_prior.pkl"
        with open(source, "rb") as f:
            self.__dict__ = pickle.load(f)

    def sample(self, category, models):
        N = self.N
        indices = [
            i for i in range(N) if self.model_index_to_cat[i] == category
        ]
        p = [self.model_freq[indices[i]] for i in range(len(indices))]
        p = np.asarray(p)
        for model in models:
            i = self.model_to_index[model]
            p1 = [self.count[indices[j]][i] for j in range(len(indices))]
            p1 = np.asarray(p1)
            p1 = p1 / p1.sum()
            p = p * p1

        p = p / sum(p)
        numbers = np.asarray([i for i in range(len(indices))])
        return self.models[indices[np.random.choice(numbers, p=p)]]

    def get_models(self, category, important, others):
        N = self.N
        indices = [
            i for i in range(N) if self.model_index_to_cat[i] == category
        ]
        to_remove = []

        freq = [self.model_freq[indices[i]] for i in range(len(indices))]
        total_freq = sum(freq)
        for j in range(len(indices)):
            if freq[j] / total_freq < 0.01:
                if not indices[j] in to_remove:
                    to_remove.append(indices[j])

        for model in important:
            i = self.model_to_index[model]
            freq = [self.count[indices[j]][i] for j in range(len(indices))]
            total_freq = sum(freq)
            if total_freq > 0:
                for j in range(len(indices)):
                    if freq[j] / total_freq < 0.1:
                        if not indices[j] in to_remove:
                            to_remove.append(indices[j])

        for model in others:
            i = self.model_to_index[model]
            freq = [self.count[indices[j]][i] for j in range(len(indices))]
            total_freq = sum(freq)
            if total_freq > 0:
                for j in range(len(indices)):
                    if freq[j] / total_freq < 0.05:
                        if not indices[j] in to_remove:
                            to_remove.append(indices[j])

        for item in to_remove:
            if len(indices) > 1:
                indices.remove(item)

        return [self.models[index] for index in indices]
예제 #2
0
class LatentDataset(data.Dataset):
    def __init__(self,
                 scene_indices=(0, 4000),
                 data_folder="bedroom_fin_256",
                 data_root_dir=None,
                 seed=None,
                 do_rotation_augmentation=False,
                 cat_only=False,
                 use_same_category_batches=False,
                 importance_order=False,
                 epoch_size=None):
        super(LatentDataset, self).__init__()
        self.category_map = ObjectCategories()
        self.seed = seed
        self.data_folder = data_folder
        self.data_root_dir = data_root_dir
        self.scene_indices = scene_indices
        self.do_rotation_augmentation = do_rotation_augmentation
        self.cat_only = cat_only

        self.cat_name2index = None

        self.cat_index2scenes = None

        if self.data_root_dir is None:
            self.data_root_dir = utils.get_data_root_dir()

        self.catnames = self.category_map.all_non_arch_categories(
            self.data_root_dir, data_folder)
        self.cat_name2index = {
            self.catnames[i]: i
            for i in range(0, len(self.catnames))
        }
        self.n_categories = len(self.catnames)

        self.build_cat2scene()
        self.build_cats_in_scene_indices()

        self.cat_importances = self.category_map.all_non_arch_category_importances(
            self.data_root_dir, data_folder)

        # See 'prepare_same_category_batches' below for info
        self.use_same_category_batches = use_same_category_batches
        if use_same_category_batches:
            self.same_category_batch_indices = []
            assert (epoch_size is not None)
            self.epoch_size = epoch_size
        else:
            self.same_category_batch_indices = None

        self.importance_order = importance_order

    # Build a map from category index to the scene indices that contain an instance of that category
    # This ignores scene_indices and does it for the whole data folder
    def build_cat2scene(self):
        self.cat_index2scenes = defaultdict(list)
        data_root_dir = self.data_root_dir or utils.get_data_root_dir()
        data_dir = f'{data_root_dir}/{self.data_folder}'
        filename = f'{data_dir}/cat_index2scenes'
        # Create new cached map file
        if not os.path.exists(filename):
            print(
                'Building map of category to scenes containing an instance...')
            pkls = [
                path for path in os.listdir(data_dir) if path.endswith('.pkl')
            ]
            pklnames = [os.path.splitext(path)[0] for path in pkls]
            # Only get the .pkl files which are numbered scenes
            indices = [
                int(pklname) for pklname in pklnames if pklname.isdigit()
            ]
            i = 0
            for idx in indices:
                i += 1
                sys.stdout.write(f'   {i}/{len(indices)}\r')
                sys.stdout.flush()
                scene = RenderedScene(idx, self.data_folder,
                                      self.data_root_dir)
                object_nodes = scene.object_nodes
                for node in object_nodes:
                    self.cat_index2scenes[node['category']].append(idx)
            pickle.dump(self.cat_index2scenes, open(filename, 'wb'))
            print('')
        # Load an existing cached map file from disk
        else:
            self.cat_index2scenes = pickle.load(open(filename, 'rb'))

    def __len__(self):
        if self.use_same_category_batches:
            return self.epoch_size
        else:
            return self.scene_indices[1] - self.scene_indices[0]

    # First, find the set of categories that occur within the scene indices
    # We do this because it's possible that there might be some category that
    #    occurs in the dataset, but only in the test set...
    def build_cats_in_scene_indices(self):
        cats_seen = {}
        for cat, scene_indices in self.cat_index2scenes.items():
            scenes = [idx for idx in scene_indices if \
                (idx >= self.scene_indices[0] and idx < self.scene_indices[1])]
            if len(scenes) > 0:
                cats_seen[cat] = True
        cats_seen = list(cats_seen.keys())
        self.cats_seen = cats_seen

    # Use at the beginning of each epoch to support loading batches of all the same category
    # NOTE: The data loader must have shuffle set to False for this to work
    def prepare_same_category_batches(self, batch_size):
        # Build a random list of category indices (grouped by batch_size)
        # This requires than length of dataset is a multiple of batch_size
        assert (len(self) % batch_size == 0)
        num_batches = len(self) // batch_size
        self.same_category_batch_indices = []
        for i in range(num_batches):
            # cat_index = random.randint(0, self.n_categories-1)
            cat_index = random.choice(self.cats_seen)
            for j in range(batch_size):
                self.same_category_batch_indices.append(cat_index)

    # 'importance' = a function of both size and observation frequency
    def sort_object_nodes_by_importance(self,
                                        object_nodes,
                                        noise=None,
                                        swap_prob=None):
        # Build list of pairs of (index, importance)
        index_imp_pairs = []
        for i in range(0, len(object_nodes)):
            node = object_nodes[i]
            cat = node["category"]
            imp = self.cat_importances[cat]
            index_imp_pairs.append((i, imp))

        # Optionally, add noise to these importance scores
        # Noise is expressed as a multiple of the standard deviation of the importance scores
        # A typical value might be really small, e.g. 0.05(?)
        if noise is not None:
            imps = [pair[1] for pair in index_imp_pairs]
            istd = np.array(imps).std()
            index_imp_pairs = [(index,
                                imp + noise * random.normalvariate(0, istd))
                               for index, imp in index_imp_pairs]

        # Sort based on importance
        index_imp_pairs.sort(key=lambda tup: tup[1], reverse=True)

        sorted_nodes = [object_nodes[tup[0]] for tup in index_imp_pairs]

        # Optionally, swap nodes with some probabilitiy
        if swap_prob is not None:
            indices = list(range(len(sorted_nodes)))
            for i in range(len(indices)):
                if random.random() < swap_prob:
                    indices_ = list(range(len(sorted_nodes)))
                    idx1 = random.choice(indices_)
                    indices_.remove(idx1)
                    idx2 = random.choice(indices_)
                    tmp = indices[idx1]
                    indices[idx1] = indices[idx2]
                    indices[idx2] = tmp
                    tmp = sorted_nodes[idx1]
                    sorted_nodes[idx1] = sorted_nodes[idx2]
                    sorted_nodes[idx2] = tmp

        return sorted_nodes

    def order_object_nodes(self, object_nodes):
        if self.importance_order:
            object_nodes = self.sort_object_nodes_by_importance(object_nodes)
        else:
            object_nodes = object_nodes[:]
            random.shuffle(object_nodes)

        # The following extra sorting passes only apply to datasets that have second-tier objects
        # We can check for this by looking for the presence of certain object properties e.g. 'parent'
        if 'parent' in object_nodes[0]:
            # Make sure that all second-tier objects come *after* first tier ones
            def is_second_tier(node):
                return (node['parent'] != 'Wall') and \
                       (node['parent'] != 'Floor')

            object_nodes.sort(key=lambda node: int(is_second_tier(node)))

            # Make sure that all children come after their parents
            def cmp_parent_child(node1, node2):
                # Less than (negative): node1 is the parent of node2
                if node2['parent'] == node1['id']:
                    return -1
                # Greater than (postive): node2 is the parent of node1
                elif node1['parent'] == node2['id']:
                    return 1
                # Equal (zero): all other cases
                else:
                    return 0

            object_nodes.sort(key=cmp_to_key(cmp_parent_child))

        # #### TEST: make sure office_chair comes *after* desk
        # def chair_vs_desk(node1, node2):
        #     catname1 = self.catnames[node1['category']]
        #     catname2 = self.catnames[node2['category']]
        #     if (catname1 == 'desk') and (catname2 == 'office_chair'):
        #         return -1
        #     elif (catname1 == 'office_chair') and (catname2 == 'desk'):
        #         return 1
        #     else:
        #         return 0
        # object_nodes.sort(key = cmp_to_key(chair_vs_desk))

        return object_nodes

    def get_scene(self, index, stop_prob=None):
        i = index + self.scene_indices[0]
        scene = RenderedScene(i, self.data_folder, self.data_root_dir)
        object_nodes = self.order_object_nodes(scene.object_nodes)

        # With some probability, sample a 'stop' (i.e. the end of the scene build sequence)
        if stop_prob is not None and random.random() < stop_prob:
            output_node = None
            input_nodes = object_nodes
        else:
            # Pick a random index at which to split into (a) existing objects and
            #    (b) objects yet-to-be-added.
            split_idx = random.randint(0, len(object_nodes) - 1)
            # This object is the ouput node
            output_node = object_nodes[split_idx]
            # All object before this index are input nodes
            input_nodes = object_nodes[0:split_idx]

        return scene, input_nodes, output_node

    def get_scene_specific_category(self, cat_index_or_name, empty_room=False):
        if isinstance(cat_index_or_name, list):
            cat_index_or_name = random.choice(cat_index_or_name)
        if isinstance(cat_index_or_name, int):
            cat_index = cat_index_or_name
        else:
            cat_name = cat_index_or_name
            cat_index = self.cat_name2index[cat_name]

        # Pull out a scene (within scene_indices) that has an instance of this category
        scenes_for_cat = [idx for idx in self.cat_index2scenes[cat_index] if \
            (idx >= self.scene_indices[0] and idx < self.scene_indices[1])]
        scene_index = random.choice(scenes_for_cat)
        scene = RenderedScene(scene_index, self.data_folder,
                              self.data_root_dir)
        object_nodes = self.order_object_nodes(scene.object_nodes)

        # Pick a random instance of the category
        cat_indices = [
            i for i in range(0, len(object_nodes))
            if object_nodes[i]['category'] == cat_index
        ]
        split_idx = random.choice(cat_indices)
        # This object is the ouput node
        output_node = object_nodes[split_idx]
        if empty_room:
            input_nodes = []  # No other objects in the scene
        else:
            input_nodes = object_nodes[
                0:split_idx]  # All object before this index are input nodes

        return scene, input_nodes, output_node

    def get_scene_same_category_batch(self, index):
        cat_index = self.same_category_batch_indices[index]
        return self.get_scene_specific_category(cat_index)

    # Balance training data so that we train equally often on all target categories
    def get_scene_uniform_category(self, stop_prob=None):
        if stop_prob is not None and random.random() < stop_prob:
            scene_index = random.randint(self.scene_indices[0],
                                         self.scene_indices[1] - 1)
            scene = RenderedScene(scene_index, self.data_folder,
                                  self.data_root_dir)
            output_node = None
            input_nodes = self.order_object_nodes(scene.object_nodes)
            return scene, input_nodes, output_node
        else:
            cat_index = random.choice(self.cats_seen)
            return self.get_scene_specific_category(cat_index)

    def __getitem__(self, index):
        if self.seed:
            random.seed(self.seed)

        if self.use_same_category_batches:
            scene, input_nodes, output_node = self.get_scene_same_category_batch(
                index)
        elif self.cat_only:
            scene, input_nodes, output_node = self.get_scene(index,
                                                             stop_prob=0.1)
        else:
            scene, input_nodes, output_node = self.get_scene(index)

        # Get the composite images
        if not self.do_rotation_augmentation:
            input_img = create_transformed_composite(scene, input_nodes, 0)
            if not self.cat_only:
                output_img = create_transformed_composite(
                    scene, [output_node], 0)
        else:
            # Data augmentation: Get the composite images under a random cardinal rotation
            rot = random.choice([0, 90, 180, 270])
            input_img = create_transformed_composite(scene, input_nodes, rot)
            if not self.cat_only:
                output_img = create_transformed_composite(
                    scene, [output_node], rot)

        # Get the category of the object
        # This is an integer index
        if output_node is None:
            cat = torch.LongTensor([self.n_categories])
        else:
            cat = torch.LongTensor([output_node["category"]])

        # Also get the count of all categories currently in the scene
        catcount = torch.zeros(self.n_categories)
        for node in input_nodes:
            catidx = node['category']
            catcount[catidx] = catcount[catidx] + 1

        # If the dataset is configured to only care about predicting the category, then we can go ahead
        #    and return now
        if self.cat_only:
            return input_img, cat, catcount

        # Select just the object mask channel from the output image
        output_img = output_img[2]
        # Put a singleton dimension back in for the channel dimension
        output_img = torch.unsqueeze(output_img, 0)
        # Make sure that it has value 1 everywhere (hack: multiply by huge number and clamp)
        output_img *= 1000
        torch.clamp(output_img, 0, 1, out=output_img)  # Clamp in place

        # Get the location of the object
        # Normalize the coordinates to [-1, 1], with (0,0) being the image center
        loc = output_node['location']
        x = loc[0]
        y = loc[1]
        w = output_img.size()[2]
        x_ = ((x / w) - 0.5) * 2
        y_ = ((y / w) - 0.5) * 2
        loc = torch.Tensor([x_, y_])

        # Get the orientation of the object
        # Here, we assume that there is no scale, and that the only rotation is about the up vector
        #  (so we can just read the cos, sin values directly out of the transformation matrix)
        xform = output_node["transform"]
        cos = xform[0]
        sin = xform[8]
        orient = torch.Tensor([cos, sin])

        # Get the object-space dimensions of the output object (in pixel space)
        # (Normalize to [0, 1])
        xsize, ysize = output_node['objspace_dims']
        xsize = xsize / w
        ysize = ysize / w
        # dims = torch.Tensor([xsize, ysize])
        dims = torch.Tensor([ysize, xsize
                             ])  # Not sure why this flip is necessary atm...

        return input_img, output_img, cat, loc, orient, dims, catcount
예제 #3
0
class SupportPrior():
    def __init__(self):
        pass

    def learn(self, data_folder="bedroom_final", data_root_dir=None):
        if not data_root_dir:
            data_root_dir = utils.get_data_root_dir()
        data_dir = f"{data_root_dir}/{data_folder}"
        self.data_dir = data_dir
        self.category_map = ObjectCategories()

        files = os.listdir(data_dir)
        files = [
            f for f in files
            if ".pkl" in f and not "domain" in f and not "_" in f
        ]

        self.categories = self.category_map.all_non_arch_categories(
            data_root_dir, data_folder)
        self.category_count = self.category_map.all_non_arch_category_counts(
            data_root_dir, data_folder)
        self.cat_to_index = {
            self.categories[i]: i
            for i in range(len(self.categories))
        }
        self.num_categories = len(self.categories)
        self.categories.append("floor")
        N = self.num_categories

        self.support_count = [[0 for i in range(N + 1)] for j in range(N)]

        for index in range(len(files)):
            print(index)
            with open(f"{data_dir}/{index}.pkl", "rb") as f:
                (_, _, nodes), _ = pickle.load(f)

            object_nodes = []
            id_to_cat = {}
            for node in nodes:
                modelId = node["modelId"]
                category = self.category_map.get_final_category(modelId)
                if not self.category_map.is_arch(category):
                    object_nodes.append(node)
                    id_to_cat[node["id"]] = self.cat_to_index[category]
                    node["category"] = self.cat_to_index[category]

            for node in object_nodes:
                parent = node["parent"]
                category = node["category"]
                if parent == "Floor" or parent is None:
                    self.support_count[category][-1] += 1
                else:
                    self.support_count[category][id_to_cat[parent]] += 1
            #quit()

        self.possible_supports = {}
        for i in range(self.num_categories):
            print(f"Support for {self.categories[i]}:")
            supports = [(c, self.support_count[i][c] / self.category_count[i])
                        for c in range(N + 1)]
            supports = sorted(supports, key=lambda x: -x[1])
            supports = [s for s in supports if s[1] > 0.01]
            for s in supports:
                print(f"    {self.categories[s[0]]}:{s[1]:4f}")
            self.possible_supports[i] = [s[0] for s in supports]

        print(self.possible_supports)
        self.N = N

    def save(self, dest=None):
        if dest == None:
            dest = f"{self.data_dir}/support_prior.pkl"
        with open(dest, "wb") as f:
            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)

    def load(self, data_dir):
        source = f"{data_dir}/support_prior.pkl"
        with open(source, "rb") as f:
            self.__dict__ = pickle.load(f)