def __init__(self, table_name, join_keys, df, join_name): self.table = table_name self.df = df self.join_keys = [f"{table_name}.{k}" for k in join_keys] self.df.columns = [f"{table_name}.{k}" for k in self.df.columns] self.indicator_column = f"__in_{table_name}" self.sample_columns = [ c for c in self.df.columns if c not in self.join_keys ] # exclude join keys self.index_provider = rustlib.IndexProvider( f"{join_name}/{table_name}.pk.indices", NULL) log.info(f"DataTableActor of `{table_name}` is ready.")
def __init__(self, table, jct, join_spec): self.jct = jct self.table = table parents = list(join_spec.join_tree.predecessors(table)) assert len(parents) <= 1, parents if len(parents) == 1: parent = parents[0] join_keys = join_spec.join_graph[parent][table]["join_keys"] self.table_join_key = f"{table}.{join_keys[table]}" self.parent_join_key = f"{parent}.{join_keys[parent]}" null_row_offset = self._insert_null_to_jct() self.index_provider = rustlib.IndexProvider( f"{join_spec.join_name}/{table}.jk.indices", null_row_offset) else: self.jct_distribution = get_distribution( self.jct[f"{self.table}.weight"]) log.info(f"JoinCountTableActor `{table}` is ready.")