Ejemplo n.º 1
0
    def index_mapping(self) -> Dict[str, Dict[Any, int]]:
        if not hasattr(self, "_index_mapping"):
            index_mapping_path = get_index_mapping_path(self.output().path)
            if os.path.exists(index_mapping_path):
                with open(index_mapping_path, "rb") as f:
                    self._index_mapping = pickle.load(f)
            else:
                self._creating_index_mapping = True
                df = self.get_data_frame_for_indexing()

                self._index_mapping = {
                    column.name: create_index_mapping(df[column.name].values)
                    for column in self.project_config.all_columns
                    if column.type == IOType.INDEXABLE and not column.same_index_as
                }
                self._index_mapping.update(
                    {
                        column.name: create_index_mapping_from_arrays(
                            df[column.name].values
                        )
                        for column in self.project_config.all_columns
                        if column.type == IOType.INDEXABLE_ARRAY
                        and not column.same_index_as
                    }
                )
                for column in self.project_config.all_columns:
                    if column.same_index_as:
                        self._index_mapping[column.name] = self._index_mapping[
                            column.same_index_as
                        ]
                with open(index_mapping_path, "wb") as f:
                    pickle.dump(self._index_mapping, f)
                del self._creating_index_mapping
        return self._index_mapping
Ejemplo n.º 2
0
    def index_mapping(self) -> Dict[str, Dict[Any, int]]:
        if not hasattr(self, "_index_mapping"):
            print("index_mapping...")

            self._creating_index_mapping = True
            df = preprocess_interactions_data_frame(
                self.get_data_frame_for_indexing(), self.project_config)

            if os.path.exists(self.index_mapping_path):
                with open(self.index_mapping_path, "rb") as f:
                    self._index_mapping = pickle.load(f)
                #del self._creating_index_mapping
            else:
                self._index_mapping = {}

            keys_in_map = list(self._index_mapping.keys())
            project_all_columns = [
                c for c in self.project_config.all_columns
                if c.name not in keys_in_map
            ]

            print("indexing project_all_columns...")
            for column in project_all_columns:
                if column.type == IOType.INDEXABLE and not column.same_index_as:
                    self._index_mapping[column.name] = create_index_mapping(
                        df[column.name].values)

            # self._index_mapping = {
            #     column.name: create_index_mapping(df[column.name].values)
            #     for column in project_all_columns
            #     if column.type == IOType.INDEXABLE and not column.same_index_as
            # }
            print("indexing create_index_mapping_from_arrays...")
            self._index_mapping.update({
                column.name:
                create_index_mapping_from_arrays(df[column.name].values)
                for column in project_all_columns
                if column.type == IOType.INDEXABLE_ARRAY
                and not column.same_index_as
            })

            print("indexing same_index_as...")
            for column in project_all_columns:
                if column.same_index_as:
                    self._index_mapping[column.name] = self._index_mapping[
                        column.same_index_as]

            del self._creating_index_mapping
            del df

            with open(get_index_mapping_path(self.output().path), "wb") as f:
                pickle.dump(self._index_mapping, f)

        return self._index_mapping
Ejemplo n.º 3
0
 def index_mapping_path(self) -> Optional[str]:
     if self.load_index_mapping_path:
         return get_index_mapping_path(self.load_index_mapping_path)
     return get_index_mapping_path(self.output().path)