def index_mapping(self) -> Dict[str, Dict[Any, int]]: if not hasattr(self, "_index_mapping"): index_mapping_path = get_index_mapping_path(self.output().path) if os.path.exists(index_mapping_path): with open(index_mapping_path, "rb") as f: self._index_mapping = pickle.load(f) else: self._creating_index_mapping = True df = self.get_data_frame_for_indexing() self._index_mapping = { column.name: create_index_mapping(df[column.name].values) for column in self.project_config.all_columns if column.type == IOType.INDEXABLE and not column.same_index_as } self._index_mapping.update( { column.name: create_index_mapping_from_arrays( df[column.name].values ) for column in self.project_config.all_columns if column.type == IOType.INDEXABLE_ARRAY and not column.same_index_as } ) for column in self.project_config.all_columns: if column.same_index_as: self._index_mapping[column.name] = self._index_mapping[ column.same_index_as ] with open(index_mapping_path, "wb") as f: pickle.dump(self._index_mapping, f) del self._creating_index_mapping return self._index_mapping
def index_mapping(self) -> Dict[str, Dict[Any, int]]: if not hasattr(self, "_index_mapping"): print("index_mapping...") self._creating_index_mapping = True df = preprocess_interactions_data_frame( self.get_data_frame_for_indexing(), self.project_config) if os.path.exists(self.index_mapping_path): with open(self.index_mapping_path, "rb") as f: self._index_mapping = pickle.load(f) #del self._creating_index_mapping else: self._index_mapping = {} keys_in_map = list(self._index_mapping.keys()) project_all_columns = [ c for c in self.project_config.all_columns if c.name not in keys_in_map ] print("indexing project_all_columns...") for column in project_all_columns: if column.type == IOType.INDEXABLE and not column.same_index_as: self._index_mapping[column.name] = create_index_mapping( df[column.name].values) # self._index_mapping = { # column.name: create_index_mapping(df[column.name].values) # for column in project_all_columns # if column.type == IOType.INDEXABLE and not column.same_index_as # } print("indexing create_index_mapping_from_arrays...") self._index_mapping.update({ column.name: create_index_mapping_from_arrays(df[column.name].values) for column in project_all_columns if column.type == IOType.INDEXABLE_ARRAY and not column.same_index_as }) print("indexing same_index_as...") for column in project_all_columns: if column.same_index_as: self._index_mapping[column.name] = self._index_mapping[ column.same_index_as] del self._creating_index_mapping del df with open(get_index_mapping_path(self.output().path), "wb") as f: pickle.dump(self._index_mapping, f) return self._index_mapping
def index_mapping_path(self) -> Optional[str]: if self.load_index_mapping_path: return get_index_mapping_path(self.load_index_mapping_path) return get_index_mapping_path(self.output().path)