def test_conversion_key(): key_1 = RowKey(1) assert key_1 == (1, ) key_1 = RowKey(1.0) assert key_1 == (1.0, ) key_1 = RowKey("A") assert key_1 == ("A", ) key_1 = RowKey("ABC") assert key_1 == ("ABC", ) key_1 = RowKey((1, )) assert key_1 == (1, ) key_1 = RowKey((1, 2)) assert key_1 == (1, 2) key_1 = RowKey((1, 2, 3)) assert key_1 == (1, 2, 3) key_1 = RowKey([1]) assert key_1 == (1, ) key_1 = RowKey([1, 2]) assert key_1 == (1, 2) key_1 = RowKey([1, 2, 3]) assert key_1 == (1, 2, 3)
def __init__(self, rows, names=None, _internal_=False, _children_names_=None): if _internal_: # rows are dictionary for internal calls key_values = rows try: self._row_sample_ = next(iter(rows)) except StopIteration: # Rows are empty super().__init__(key_values) self._row_sample_ = None self.names = names if _children_names_ is None: self.children_names = [] self.columns = TableColumns( names=names, children_names=[], table=self ) else: self.children_names = _children_names_ self.columns = TableColumns( names=names, children_names=_children_names_, table=self ) return else: if isinstance(rows, Mapping): key_values = [(RowKey(k), value) for k, value in rows.items()] elif isinstance(rows, Iterable): key_values = [(RowKey(k), value) for k, value in rows] else: raise ValueError("Table expect rows as Mapping/Iterable") self._row_sample_ = key_values[0][0] if names is None: names = [f"X{i+1}" for i, _ in enumerate(self._row_sample_)] if len(names) != len(self._row_sample_): raise ValueError("The length of column names and columns are not the same.") super().__init__(key_values) self.names = names value_sample = super().__getitem__(self._row_sample_) if isinstance(value_sample, Table): self.columns = TableColumns( names=names, children_names=value_sample.names, table=self ) self.children_names = value_sample.names else: if _children_names_ is None: self.children_names = [] self.columns = TableColumns(names=names, children_names=[], table=self) else: self.children_names = _children_names_ self.columns = TableColumns( names=names, children_names=_children_names_, table=self )
def from_np_array(cls, samples, names=None): """Construct a FrequencyTable from a 2d numpy array or list of lists. The resulting keys are tuples. Args: samples (list or numpy.ndarray): the observed samples. names (list, optional): List of names of the columns. If it is not provided, it creates as 'Xn'. Defaults to None. Raises: ValueError: Raises when the samples argument is not list or numpy.ndarray. """ if not isinstance(samples, (np.ndarray, list)): raise ValueError( "'sample' argument must be numpy 2D ndarray or list of list.") elif isinstance(samples, list) and not isinstance(samples[0], list): raise ValueError( "'sample' argument must be numpy 2D ndarray or list of list.") # Convert rows to element, before calling # the construct return cls(samples=[RowKey(row) for row in samples], names=names)
def reduce(self, **kwargs): """Reduce the Table by one or more columns. P(X, Y) -> P(X = x, Y) or P(X, Y = y) Args: kwargs (dict): A dictionary that its 'key' is the name of the column and its 'value' is the value that must be reduced by. Raises: ValueError: If the provided names do not exist in the Table. Returns: [Table]: A reduce Table. """ # split columns to indices and comp_indices columns = list(kwargs.keys()) if len(columns) == self.columns.size: raise ValueError("Cannot reduce on all column names.") columns_info = self.columns.split_columns(*columns) values = np.array([value for _, value in kwargs.items()], dtype=np.object) # # Convert the key:values to 2D numpy array # the array rows are (keys, value) arr_counter = self.to_2d_array() # filter the 2d array rows by provided values of the reduce # conditioned_arr is a boolean one, and filtering happens # in the second line conditioned_arr = np.all(arr_counter[:, columns_info.indices] == values, axis=1) sliced_arr = arr_counter[conditioned_arr, :] # filter the 2d array columns (the compliment columns) # plus the value column (which is the last column) sliced_arr = sliced_arr[:, columns_info.complimnet_indices + [-1]] # divide the 2d array's rows to a tuple of columns # and value # So, we make a generator that divide the rows to the tuple of # columns (tuple(row[:-1]) and value (row[-1]) arr_gen = ((RowKey(row[:-1]), row[-1]) for row in sliced_arr) # Before calling the groupby, we have to sort the generator # by the tuple of column (index zero in itemgetter) sorted_slice_arr = sorted(arr_gen, key=itemgetter(0)) # group by the filtered columns (compliment # columns) and sum the value per key # Note that the 'itemgetter' read the first index which # is the tuple of compliment columns return Table( { k: sum([item[1] for item in g]) for k, g in groupby(sorted_slice_arr, key=itemgetter(0)) }, columns_info.complimnet_names, _internal_=True, )
def marginal(self, *args, normalise=True): """Marginal of (group by) the Table over a set of columns. P(X, Y, Z) -> P(X, Y) or P(X, Z) or P(Y, Z) Args: args (list): List of column names to marginalised. Raises: ValueError: Raises when one of the column names is not defined. Or raises when requested for all column names. Returns: Table: (rows, names). """ # check the validity of operation based on column names if len(args) == self.columns.size: raise ValueError("Cannot marginalize on all column names.") # split columns to indices and comp_indices columns_info = self.columns.split_columns(*args) # # Convert the key:values to 2D numpy array # the array rows are (row, value) arr = self.to_2d_array() # filter the compliment columns filtered_arr = np.c_[arr[:, columns_info.complimnet_indices], arr[:, -1]] # split the 2d array's rows to a tuple of # compliment columns (row[comp_indices]) # and count row[-1] arr_gen = ((RowKey(row[:-1]), row[-1]) for row in filtered_arr) # Before calling the groupby, we have to sort the generator # by the tuple of compliment columns (index zero in itemgetter) sorted_arr = sorted(arr_gen, key=itemgetter(0)) # since the values in each 'group' are # (compliment columns, value) # here we group by 'compliment columns' and apply # the sum on the value. Then the dictionary of # compliment columns:op_of_values # is an acceptable argument for Table grouped_arr = { k: sum([item[1] for item in g]) for k, g in groupby(sorted_arr, key=itemgetter(0)) } table = Table(grouped_arr, columns_info.complimnet_names, _internal_=True) if normalise: table.normalise() return table
def compliment_key(key): # Method to split the keys return RowKey(*[key[i] for i in compliment_indices])
def compliment_key(key): # Method to make a split key return RowKey(*[key[i] for i in compliment_indices])
def condition_on(self, *args, normalise=True): """Creates the conditional based on the provided names of columns. P(X, Y) -> P(X | Y) or P(Y | X) Args: args (list): List of names of provided random variables. Raises: ValueError: If the provided RV names do not exist in the distribution. Returns: MultiTable """ if self.columns.size == 1: raise ValueError("This is a single column Table and cannot condition on.") if len(args) == self.columns.size: raise ValueError("Cannot condition on all columns.") # split columns to indices and comp_indices columns_info = self.columns.split_columns(*args) # Convert the key:value to 2D numpy array # the array rows are (rows, value) arr = self.to_2d_array() # divide the 2d array's rows to a tuple of columns, # (row[indices]), compliment columns (row[comp_indices]) # and values row[-1] arr_gen = ( ( RowKey(row[columns_info.indices]), RowKey(row[columns_info.complimnet_indices]), row[-1], ) for row in arr ) # Before calling the groupby, we have to sort the generator # by the tuple of columns (index zero in itemgetter) # And since later we will call the group by on group, # for each key we do the inner sort too (index one in itemgetter) sorted_arr = sorted(arr_gen, key=itemgetter(0, 1)) # This method convert a group to a dictionary def make_dict(group): # since the values in 'group' argument are # (columns, compliment columns, value) # here we group by 'compliment columns' and sum # the values. return { k: sum([item[2] for item in g2]) for k, g2 in groupby(group, key=itemgetter(1)) } # For each group (belongs a unique values), we create # a dictionary in a dictionary comprehension grouped_arr = { k: make_dict(g) for k, g in groupby(sorted_arr, key=itemgetter(0)) } # The above dictionary is dictionary of dictionaries # # the first set of names is for parent dictionary # and the second set is for children table = MultiTable( { key: Table(values, columns_info.complimnet_names, _internal_=True) for key, values in grouped_arr.items() }, columns_info.indices_names, ) if normalise: table.normalise() return table