def test_as_discrete(self): table = self.data domain = table.domain tr = AsCategorical() dtr = [] for v in domain.variables: vtr = apply_reinterpret(v, tr, table_column_data(table, v)) dtr.append(vtr) tdomain = Domain(dtr) ttable = table.transform(tdomain) assert_array_equal( ttable.X, np.array([ [0, 2, 2, 1], [1, 1, 3, 2], [2, 0, 1, 3], [1, 0, 0, 0], ], dtype=float)) self.assertEqual(tdomain["A"].values, ["a", "b", "c"]) self.assertEqual(tdomain["B"].values, ["0", "1", "2"]) self.assertEqual(tdomain["C"].values, ["0.0", "0.2", "0.25", "1.25"]) self.assertEqual(tdomain["D"].values, [ "1970-01-01 00:00:00", "1970-01-01 00:03:00", "1970-01-01 00:06:00", "1970-01-01 00:12:00" ])
def test_reinterpret_string(self): table = self.data_str domain = table.domain tvars = [] for v in domain.metas: for i, tr in enumerate( [AsContinuous(), AsCategorical(), AsTime(), AsString()]): vtr = apply_reinterpret(v, tr, table_column_data( table, v)).renamed(f"{v.name}_{i}") if isinstance(tr, AsTime): strp = StrpTime("Detect automatically", None, 1, 1) vtr = apply_transform_var(vtr, [strp]) tvars.append(vtr) tdomain = Domain([], metas=tvars) ttable = table.transform(tdomain) assert_array_nanequal( ttable.metas, np.array([ [0.1, 0., np.nan, "0.1", 2010., 0., 1262304000., "2010"], [1.0, 1., np.nan, "1.0", 2020., 1., 1577836800., "2020"], ], dtype=object))
def _make_parts(self, data, group_var=None): """ Make initial `Parts` for data, split by group_var, group_key """ if group_var is not None: assert group_var.is_discrete _col_data = table_column_data(data, group_var) row_indices = [np.flatnonzero(_col_data == i) for i in range(len(group_var.values))] row_groups = [RowPart(title=name, indices=ind, cluster=None, cluster_ordered=None) for name, ind in zip(group_var.values, row_indices)] if np.any(_col_data.mask): row_groups.append(RowPart( title="N/A", indices=np.flatnonzero(_col_data.mask), cluster=None, cluster_ordered=None )) else: row_groups = [RowPart(title=None, indices=range(0, len(data)), cluster=None, cluster_ordered=None)] col_groups = [ ColumnPart( title=None, indices=range(0, len(data.domain.attributes)), domain=data.domain, cluster=None, cluster_ordered=None) ] minv, maxv = np.nanmin(data.X), np.nanmax(data.X) return Parts(row_groups, col_groups, span=(minv, maxv))
def test_reinterpret_string(self): table = self.data_str domain = table.domain tvars = [] for v in domain.metas: for tr in [AsContinuous(), AsCategorical(), AsTime(), AsString()]: tr = apply_reinterpret(v, tr, table_column_data(table, v)) tvars.append(tr) tdomain = Domain([], metas=tvars) ttable = table.transform(tdomain) assert_array_nanequal( ttable.metas, np.array([ [0.1, 0., np.nan, "0.1", 2010., 0., 1262304000., "2010"], [1.0, 1., np.nan, "1.0", 2020., 1., 1577836800., "2020"], ], dtype=object))
def test_as_continuous(self): table = self.data domain = table.domain tr = AsContinuous() dtr = [] for v in domain.variables: vtr = apply_reinterpret(v, tr, table_column_data(table, v)) dtr.append(vtr) ttable = table.transform(Domain(dtr)) assert_array_equal( ttable.X, np.array([ [np.nan, 2, 0.25, 180], [np.nan, 1, 1.25, 360], [np.nan, 0, 0.20, 720], [np.nan, 0, 0.00, 000], ], dtype=float))
def test_as_string(self): table = self.data domain = table.domain tr = AsString() dtr = [] for v in domain.variables: vtr = apply_reinterpret(v, tr, table_column_data(table, v)) dtr.append(vtr) ttable = table.transform(Domain([], [], dtr)) assert_array_equal( ttable.metas, np.array([ ["a", "2", "0.25", "00:03:00"], ["b", "1", "1.25", "00:06:00"], ["c", "0", "0.2", "00:12:00"], ["b", "0", "0.0", "00:00:00"], ], dtype=object))
def test_as_time(self): # this test only test type of format that can be string, continuous and discrete # correctness of time formats is already tested in TimeVariable module d = TimeVariable("_").parse_exact_iso times = ( ["07.02.2022", "18.04.2021"], # date only ["07.02.2022 01:02:03", "18.04.2021 01:02:03"], # datetime ["010203", "010203"], # time ["02-07", "04-18"], ) formats = ["25.11.2021", "25.11.2021 00:00:00", "000000", "11-25"] expected = [ [d("2022-02-07"), d("2021-04-18")], [d("2022-02-07 01:02:03"), d("2021-04-18 01:02:03")], [d("01:02:03"), d("01:02:03")], [d("1900-02-07"), d("1900-04-18")], ] variables = [StringVariable(f"s{i}") for i in range(len(times))] variables += [ DiscreteVariable(f"d{i}", values=t) for i, t in enumerate(times) ] domain = Domain([], metas=variables) metas = [t for t in times] + [list(range(len(x))) for x in times] table = Table(domain, np.empty((len(times[0]), 0)), metas=np.array(metas).transpose()) tr = AsTime() dtr = [] for v, f in zip(domain.metas, chain(formats, formats)): strp = StrpTime(f, *TimeVariable.ADDITIONAL_FORMATS[f]) vtr = apply_transform_var( apply_reinterpret(v, tr, table_column_data(table, v)), [strp]) dtr.append(vtr) ttable = table.transform(Domain([], metas=dtr)) assert_array_equal( ttable.metas, np.array(list(chain(expected, expected)), dtype=float).transpose())