def test_hierarchy_from_labels_e(self) -> None: index_constructors = (Index, IndexDate) labels = ( ('a', '2019-01-01'), ('a', '2019-02-01'), ('b', '2019-01-01'), ('b', '2019-02-01'), ) from static_frame.core.exception import ErrorInitIndex with self.assertRaises(ErrorInitIndex): ih = IndexHierarchy.from_labels(labels, index_constructors=(Index, )) ih = IndexHierarchy.from_labels(labels, index_constructors=index_constructors) self.assertEqual(ih.loc[HLoc[:, '2019-02']].values.tolist(), [['a', datetime.date(2019, 2, 1)], ['b', datetime.date(2019, 2, 1)]]) self.assertEqual(ih.loc[HLoc[:, '2019']].values.tolist(), [['a', datetime.date(2019, 1, 1)], ['a', datetime.date(2019, 2, 1)], ['b', datetime.date(2019, 1, 1)], ['b', datetime.date(2019, 2, 1)]]) self.assertEqual(ih.loc[HLoc[:, '2019-02-01']].values.tolist(), [['a', datetime.date(2019, 2, 1)], ['b', datetime.date(2019, 2, 1)]])
def test_index_many_concat_c(self) -> None: from datetime import date i1 = IndexHierarchy.from_labels( [[1, date(2019, 1, 1)], [2, date(2019, 1, 2)]], index_constructors=[Index, IndexDate]) i2 = IndexHierarchy.from_labels( [[2, date(2019, 1, 3)], [3, date(2019, 1, 4)]], index_constructors=[Index, IndexDate]) i3 = IndexHierarchy.from_labels( [[4, date(2019, 1, 5)], [5, date(2019, 1, 6)]], index_constructors=[Index, IndexDate]) i4 = IndexHierarchy.from_labels([[4, date(2019, 1, 5)], [5, date(2019, 1, 6)]]) i5 = index_many_concat((i1, i2, i3), cls_default=Index) assert isinstance(i5, IndexHierarchy) self.assertEqual(i5.index_types.to_pairs(), ((0, Index), (1, IndexDate))) self.assertEqual(i5.values.tolist(), [[1, date(2019, 1, 1)], [2, date(2019, 1, 2)], [2, date(2019, 1, 3)], [3, date(2019, 1, 4)], [4, date(2019, 1, 5)], [5, date(2019, 1, 6)]]) # with unaligned index types we fall back in Index i6 = index_many_concat((i1, i2, i4), cls_default=Index) assert isinstance(i6, IndexHierarchy) self.assertEqual(i6.index_types.to_pairs(), ((0, Index), (1, Index)))
def test_hierarchy_from_labels_a(self): labels = ( ('I', 'A', 1), ('I', 'A', 2), ('I', 'B', 1), ('I', 'B', 2), ('II', 'A', 1), ('II', 'A', 2), ('II', 'B', 1), ('II', 'B', 2), ) ih = IndexHierarchy.from_labels(labels) self.assertEqual(len(ih), 8) self.assertEqual(ih.depth, 3) self.assertEqual([ih.loc_to_iloc(x) for x in labels], [0, 1, 2, 3, 4, 5, 6, 7]) labels = ( ('I', 'A', 1), ('I', 'A', 2), ('I', 'B', 1), ('II', 'B', 2), ) ih = IndexHierarchy.from_labels(labels) self.assertEqual(len(ih), 4) self.assertEqual(ih.depth, 3) self.assertEqual([ih.loc_to_iloc(x) for x in labels], [0, 1, 2, 3])
def test_hierarchy_intersection_a(self): labels = ( ('I', 'A'), ('I', 'B'), ('II', 'A'), ('II', 'B'), ) ih1 = IndexHierarchy.from_labels(labels) labels = ( ('II', 'A'), ('II', 'B'), ('III', 'A'), ('III', 'B'), ) ih2 = IndexHierarchy.from_labels(labels) post = ih1.intersection(ih2) self.assertEqual(post.values.tolist(), [['II', 'A'], ['II', 'B']]) post = ih1.union(ih2) self.assertEqual(post.values.tolist(), [['I', 'A'], ['I', 'B'], ['II', 'A'], ['II', 'B'], ['III', 'A'], ['III', 'B']])
def test_index_display(self, ih: IndexHierarchy) -> None: d1 = ih.display() self.assertTrue(len(d1) > 0) d2 = ih.display_tall() self.assertTrue(len(d2) > 0) d3 = ih.display_wide() self.assertTrue(len(d3) > 0)
def test_hierarhcy_init_a(self): labels = (('I', 'A'), ('I', 'B'), ) ih1 = IndexHierarchy.from_labels(labels, name='foo') ih2 = IndexHierarchy(ih1) self.assertEqual(ih1.name, 'foo') self.assertEqual(ih2.name, 'foo')
def test_index_levels_equals_b(self) -> None: idx1 = Index(('a', 'b', 'c', 'd', 'e')) idx2 = Index(range(10)) levels1 = IndexHierarchy.from_product(idx1, idx2)._levels idx3 = Index(('a', 'b', 'c', 'd', 'e')) idx4 = Index(range(10)) levels2 = IndexHierarchy.from_product(idx3, idx4)._levels self.assertTrue(levels1.equals(levels2))
def test_index_hierarchy_pickle_a(self) -> None: a = IndexHierarchy.from_product((10, 20), (3, 7)) b = IndexHierarchy.from_product(('a', 'b'), ('x', 'y')) for index in (a, b): # force creating of ._labels self.assertTrue(len(index.values), len(index)) pbytes = pickle.dumps(index) index_new = pickle.loads(pbytes) for v in index: # iter labels (arrays here) self.assertFalse(index_new._labels.flags.writeable) self.assertEqual(index_new.loc[tuple(v)], index.loc[tuple(v)])
def from_pandas(cls, value, *, is_go: bool = False) -> 'IndexBase': ''' Given a Pandas index, return the appropriate IndexBase derived class. ''' import pandas from static_frame import Index from static_frame import IndexGO from static_frame import IndexDate from static_frame import IndexHierarchy from static_frame import IndexHierarchyGO if isinstance(value, pandas.MultiIndex): # iterating over a hierarchucal index will iterate over labels if is_go: return IndexHierarchyGO.from_labels(value) return IndexHierarchy.from_labels(value) elif isinstance(value, pandas.DatetimeIndex): if is_go: raise NotImplementedError( 'No grow-only version of IndexDate yet exists') return IndexDate(value) if is_go: return IndexGO(value) return Index(value)
def test_hierarchy_frame_a(self): OD = OrderedDict tree = OD([ ('I', OD([ ('A', (1,)), ('B', (1, 2)) ]) ), ('II', OD([ ('A', (1,)), ('B', (1, 2)) ]) ), ]) ih = IndexHierarchy.from_tree(tree) data = np.arange(6*6).reshape(6, 6) f1 = Frame(data, index=ih, columns=ih) # self.assertEqual(len(f.to_pairs(0)), 8) f2 = f1.assign.loc[('I', 'B', 2), ('II', 'A', 1)](200) post = f2.to_pairs(0) self.assertEqual(post, ((('I', 'A', 1), ((('I', 'A', 1), 0), (('I', 'B', 1), 6), (('I', 'B', 2), 12), (('II', 'A', 1), 18), (('II', 'B', 1), 24), (('II', 'B', 2), 30))), (('I', 'B', 1), ((('I', 'A', 1), 1), (('I', 'B', 1), 7), (('I', 'B', 2), 13), (('II', 'A', 1), 19), (('II', 'B', 1), 25), (('II', 'B', 2), 31))), (('I', 'B', 2), ((('I', 'A', 1), 2), (('I', 'B', 1), 8), (('I', 'B', 2), 14), (('II', 'A', 1), 20), (('II', 'B', 1), 26), (('II', 'B', 2), 32))), (('II', 'A', 1), ((('I', 'A', 1), 3), (('I', 'B', 1), 9), (('I', 'B', 2), 200), (('II', 'A', 1), 21), (('II', 'B', 1), 27), (('II', 'B', 2), 33))), (('II', 'B', 1), ((('I', 'A', 1), 4), (('I', 'B', 1), 10), (('I', 'B', 2), 16), (('II', 'A', 1), 22), (('II', 'B', 1), 28), (('II', 'B', 2), 34))), (('II', 'B', 2), ((('I', 'A', 1), 5), (('I', 'B', 1), 11), (('I', 'B', 2), 17), (('II', 'A', 1), 23), (('II', 'B', 1), 29), (('II', 'B', 2), 35)))) ) f3 = f1.assign.loc[('I', 'B', 2):, HLoc[:, :, 2]](200) self.assertEqual(f3.to_pairs(0), ((('I', 'A', 1), ((('I', 'A', 1), 0), (('I', 'B', 1), 6), (('I', 'B', 2), 12), (('II', 'A', 1), 18), (('II', 'B', 1), 24), (('II', 'B', 2), 30))), (('I', 'B', 1), ((('I', 'A', 1), 1), (('I', 'B', 1), 7), (('I', 'B', 2), 13), (('II', 'A', 1), 19), (('II', 'B', 1), 25), (('II', 'B', 2), 31))), (('I', 'B', 2), ((('I', 'A', 1), 2), (('I', 'B', 1), 8), (('I', 'B', 2), 200), (('II', 'A', 1), 200), (('II', 'B', 1), 200), (('II', 'B', 2), 200))), (('II', 'A', 1), ((('I', 'A', 1), 3), (('I', 'B', 1), 9), (('I', 'B', 2), 15), (('II', 'A', 1), 21), (('II', 'B', 1), 27), (('II', 'B', 2), 33))), (('II', 'B', 1), ((('I', 'A', 1), 4), (('I', 'B', 1), 10), (('I', 'B', 2), 16), (('II', 'A', 1), 22), (('II', 'B', 1), 28), (('II', 'B', 2), 34))), (('II', 'B', 2), ((('I', 'A', 1), 5), (('I', 'B', 1), 11), (('I', 'B', 2), 200), (('II', 'A', 1), 200), (('II', 'B', 1), 200), (('II', 'B', 2), 200)))) )
def test_hierarchy_contains_a(self): labels = (('I', 'A'), ('I', 'B'), ) ih = IndexHierarchy.from_labels(labels) self.assertTrue(('I', 'A') in ih)
def test_index_level_index_types_a(self) -> None: idx1 = Index(('A', 'B')) idx2 = IndexDate.from_date_range('2019-01-05', '2019-01-08') idx3 = Index((1, 2)) hidx = IndexHierarchy.from_product(idx1, idx2, idx3) self.assertEqual([it.__name__ for it in hidx._levels.index_types()], ['Index', 'IndexDate', 'Index'])
def from_pandas(cls, value: 'pandas.DataFrame', ) -> 'IndexBase': ''' Given a Pandas index, return the appropriate IndexBase derived class. ''' import pandas from static_frame.core.index_datetime import IndexDatetime from static_frame import Index from static_frame import IndexGO from static_frame import IndexHierarchy from static_frame import IndexHierarchyGO if isinstance(value, pandas.MultiIndex): # iterating over a hierarchucal index will iterate over labels name = tuple(value.names) if not cls.STATIC: return IndexHierarchyGO.from_labels(value, name=name) return IndexHierarchy.from_labels(value, name=name) elif isinstance(value, pandas.DatetimeIndex): # coming from a Pandas datetime index, in the absence of other information, the best match is a Nanosecond index if not issubclass(cls, IndexDatetime): raise ErrorInitIndex(f'cannot create a datetime Index from {cls}') if not cls.STATIC: return cls(value, name=value.name) return cls(value, name=value.name) if not cls.STATIC: return IndexGO(value, name=value.name) return Index(value, name=value.name)
def test_hierarchy_extract_a(self): idx = IndexHierarchy.from_product(['A', 'B'], [1, 2]) self.assertEqual(idx.iloc[1], ('A', 2)) self.assertEqual(idx.loc[('B', 1)], ('B', 1)) self.assertEqual(idx[2], ('B', 1)) self.assertEqual(idx.loc[HLoc['B', 1]], ('B', 1))
def test_hierarchy_name_a(self): idx1 = IndexHierarchy.from_product(list('ab'), list('xy'), name='q') self.assertEqual(idx1.name, 'q') idx2 = idx1.rename('w') self.assertEqual(idx2.name, 'w')
def test_hierarchy_relabel_a(self): labels = ( ('I', 'A'), ('I', 'B'), ('II', 'A'), ('II', 'B'), ) ih = IndexHierarchy.from_labels(labels) ih.relabel({('I', 'B'): ('I', 'C')}) ih2 = ih.relabel({('I', 'B'): ('I', 'C')}) self.assertEqual(ih2.values.tolist(), [['I', 'A'], ['I', 'C'], ['II', 'A'], ['II', 'B']]) with self.assertRaises(Exception): ih3 = ih.relabel({('I', 'B'): ('I', 'C', 1)}) ih3 = ih.relabel(lambda x: tuple(e.lower() for e in x)) self.assertEqual(ih3.values.tolist(), [['i', 'a'], ['i', 'b'], ['ii', 'a'], ['ii', 'b']])
def test_index_hierarchy_sort_a(self) -> None: ih1 = IndexHierarchy.from_product((1, 2), (30, 70)) self.assertEqual( ih1.sort(ascending=False).values.tolist(), [[2, 70], [2, 30], [1, 70], [1, 30]])
def test_hierarchy_extract_a(self) -> None: idx = IndexHierarchy.from_product(['A', 'B'], [1, 2]) self.assertEqual(idx.iloc[1], ('A', 2)) self.assertEqual(idx.loc[('B', 1)], ('B', 1)) self.assertEqual(idx[2], ('B', 1)) #pylint: disable=E1136 self.assertEqual(idx.loc[HLoc['B', 1]], ('B', 1))
def test_hierarchy_from_labels_delimited_a(self) -> None: labels = ("'I' 'A'", "'I' 'B'") ih = IndexHierarchy.from_labels_delimited(labels) self.assertEqual(ih.values.tolist(), [['I', 'A'], ['I', 'B']])
def test_hierarchy_from_product_a(self): groups = Index(('A', 'B', 'C')) dates = IndexDate.from_date_range('2018-01-01', '2018-01-04') observations = Index(('x', 'y')) ih = IndexHierarchy.from_product(groups, dates, observations)
def test_frame_iter_element_c(self) -> None: a2 = np.array([[None, None], [None, 1], [None, 5]], dtype=object) a1 = np.array([True, False, True]) a3 = np.array([['a'], ['b'], ['c']]) tb1 = TypeBlocks.from_blocks((a3, a1, a2)) f1 = Frame(tb1, index=self.get_letters(None, tb1.shape[0]), columns=IndexHierarchy.from_product(('i', 'ii'), ('a', 'b'))) values = list(f1.iter_element()) self.assertEqual( values, ['a', True, None, None, 'b', False, None, 1, 'c', True, None, 5]) f2 = f1.iter_element().apply(lambda x: str(x).lower().replace('e', '')) self.assertEqual( f1.columns.__class__, f2.columns.__class__, ) self.assertEqual( f2.to_pairs(0), ((('i', 'a'), (('a', 'a'), ('b', 'b'), ('c', 'c'))), (('i', 'b'), (('a', 'tru'), ('b', 'fals'), ('c', 'tru'))), (('ii', 'a'), (('a', 'non'), ('b', 'non'), ('c', 'non'))), (('ii', 'b'), (('a', 'non'), ('b', '1'), ('c', '5')))))
def test_build_key_indexers_from_key_a(self) -> None: ih = IndexHierarchy.from_product(range(3), range(4, 7), tuple('ABC')) hlmapA = ih._map hlmapB = deepcopy(ih._map) hlmapB.encoding_can_overflow = True def check( key: tuple, # type: ignore expected: tp.List[tp.List[int]], ) -> None: resultA = hlmapA.build_key_indexers(key, indices=ih._indices) self.assertEqual(resultA.dtype, np.uint64) self.assertListEqual(resultA.tolist(), expected) resultB = hlmapB.build_key_indexers(key, indices=ih._indices) self.assertEqual(resultB.dtype, object) self.assertListEqual(resultB.tolist(), expected) check((0, 5, 'A'), [0, 1, 0]) # type: ignore check((0, 5, ['A']), [[0, 1, 0]]) check(([0, 1], 5, ['B']), [[0, 1, 1], [1, 1, 1]]) check(([0, 1], 5, 'A'), [[0, 1, 0], [1, 1, 0]]) check( ([0, 1], [4, 5, 6], 'C'), [[0, 0, 2], [0, 1, 2], [0, 2, 2], [1, 0, 2], [1, 1, 2], [1, 2, 2]])
def test_index_level_label_widths_at_depth_a(self) -> None: OD = OrderedDict tree = OD([ ('I', OD([ ('A', (1, 2)), ('B', (1, 2, 3)), ('C', (2, 3)) ]) ), ('II', OD([ ('A', (1,)), ('B', (1,)) ]) ), ('III', OD([ ('A', (1, 2, 3)), ('B', (1,)) ]) ), ]) levels = IndexHierarchy._tree_to_index_level(tree) post0 = tuple(levels.label_widths_at_depth(0)) post1 = tuple(levels.label_widths_at_depth(1)) post2 = tuple(levels.label_widths_at_depth(2)) self.assertEqual(post0, (('I', 7), ('II', 2), ('III', 4))) self.assertEqual(post1, (('A', 2), ('B', 3), ('C', 2), ('A', 1), ('B', 1), ('A', 3), ('B', 1)) ) self.assertEqual(post2, (((1, 1), (2, 1), (1, 1), (2, 1), (3, 1), (2, 1), (3, 1), (1, 1), (1, 1), (1, 1), (2, 1), (3, 1), (1, 1))) )
def test_hierarchy_reversed(self): labels = (('a', 1), ('a', 2), ('b', 1), ('b', 2)) hier_idx = IndexHierarchy.from_labels(labels) self.assertTrue( all( tuple(hidx_1) == hidx_2 for hidx_1, hidx_2 in zip( reversed(hier_idx), reversed(labels))))
def test_index_many_concat_d(self) -> None: from datetime import date i1 = IndexHierarchy.from_labels( [[1, date(2019, 1, 1)], [2, date(2019, 1, 2)]], index_constructors=[Index, IndexDate]) i2 = IndexHierarchy.from_labels( [[2, date(2019, 1, 3)], [3, date(2019, 1, 4)]], index_constructors=[Index, IndexDate]) post1 = index_many_concat((i1, i2), cls_default=IndexGO) self.assertEqual(post1.__class__, IndexHierarchyGO) assert isinstance(post1, IndexHierarchy) self.assertEqual(post1.values.tolist(), [[1, date(2019, 1, 1)], [2, date(2019, 1, 2)], [2, date(2019, 1, 3)], [3, date(2019, 1, 4)]])
def from_pandas( cls, value: 'pandas.DataFrame', ) -> 'IndexBase': ''' Given a Pandas index, return the appropriate IndexBase derived class. ''' import pandas from static_frame import Index from static_frame import IndexGO from static_frame import IndexHierarchy from static_frame import IndexHierarchyGO from static_frame import IndexNanosecond from static_frame import IndexNanosecondGO from static_frame.core.index_datetime import IndexDatetime if isinstance(value, pandas.MultiIndex): # iterating over a hierarchucal index will iterate over labels name = tuple(value.names) if not cls.STATIC: return IndexHierarchyGO.from_labels(value, name=name) return IndexHierarchy.from_labels(value, name=name) elif isinstance(value, pandas.DatetimeIndex): # if IndexDatetime, use cls, else use IndexNanosecond if issubclass(cls, IndexDatetime): return cls(value, name=value.name) else: if not cls.STATIC: return IndexNanosecondGO(value, name=value.name) return IndexNanosecond(value, name=value.name) if not cls.STATIC: return IndexGO(value, name=value.name) return Index(value, name=value.name)
def test_frame_iter_group_labels_b(self) -> None: records = ( (2, 2, 'a', 'q', False, False), (30, 34, 'b', 'c', True, False), (2, 95, 'c', 'd', False, False), ) f1 = Frame.from_records(records, columns=IndexHierarchy.from_product( (1, 2, 3), ('a', 'b')), index=('x', 'y', 'z')) # with axis 1, we are grouping based on columns while maintain the index post_tuple = tuple(f1.iter_group_labels(1, axis=1)) self.assertEqual(len(post_tuple), 2) post = f1[HLoc[f1.columns[0]]] self.assertEqual(post.__class__, Series) self.assertEqual(post.to_pairs(), (('x', 2), ('y', 30), ('z', 2))) post = f1.loc[:, HLoc[f1.columns[0]]] self.assertEqual(post.__class__, Series) self.assertEqual(post.to_pairs(), (('x', 2), ('y', 30), ('z', 2))) self.assertEqual( f1.iter_group_labels( 1, axis=1).apply(lambda x: x.iloc[:, 0].sum()).to_pairs(), (('a', 34), ('b', 131)))
def test_index_level_values_at_depth_a(self) -> None: hidx = IndexHierarchy.from_labels( (('a', 1, 'x'), ('a', 2, 'y'), ('b', 1, 'foo'), ('b', 1, 'bar'))) lvl = hidx._levels self.assertEqual( lvl.values_at_depth(2).tolist(), ['x', 'y', 'foo', 'bar']) self.assertEqual(lvl.depth, next(lvl.depths()))
def test_index_init_b(self) -> None: idx1 = IndexHierarchy.from_product(['A', 'B'], [1, 2]) idx2 = Index(idx1) self.assertEqual(idx2.values.tolist(), [('A', 1), ('A', 2), ('B', 1), ('B', 2)])
def test_hierarchy_to_pandas_a(self): idx1 = IndexHierarchy.from_product(list('ab'), list('xy'), name='q') pdidx = idx1.to_pandas() # NOTE: pandas .values on a hierarchical index returns an array of tuples self.assertEqual(idx1.values.tolist(), [list(x) for x in pdidx.values.tolist()])