def test_formatter() -> None: _ = str(object_formatter) t = Tafra({'x': Decimal(1.2345)}) assert t._dtypes['x'] == 'float64' assert t['x'].dtype == np.dtype(float) object_formatter['Decimal'] = lambda x: x.astype(int) t = Tafra({'x': Decimal(1.2345)}) if platform.system() == 'Windows': assert t._dtypes['x'] == 'int32' elif platform.system() == 'Linux': assert t._dtypes['x'] == 'int64' assert t['x'].dtype == np.dtype(int) _ = str(object_formatter) for fmt in object_formatter: pass _ = object_formatter.copy() del object_formatter['Decimal'] with pytest.raises(ValueError) as e: object_formatter['Decimal'] = lambda x: 'int' # type: ignore _ = str(object_formatter)
def test_cross_join() -> None: l = Tafra({ 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1]) }) r = Tafra({ 'a': np.array([1, 2, 3, 4, 5, 6]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.cross_join(r) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 2, 2, 3, 3]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.cross_join(r) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 1, 2, 2, 2]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.cross_join(r) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 1, 2, 2, 2]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.cross_join(r, select=['x', 'z', 'a', 'c']) check_tafra(t) with pytest.raises(IndexError) as e: t = l.cross_join(r, select=['x', 'z']) with pytest.raises(IndexError) as e: t = l.cross_join(r, select=['a', 'c'])
def test_coalesce() -> None: t = Tafra({'x': np.array([1, 2, None, 4, None])}) t['x'] = t.coalesce( 'x', [[1, 2, 3, None, 5], [None, None, None, None, 'five']]) # type: ignore t['y'] = t.coalesce( 'y', [[1, 2, 3, None, 5], [None, None, None, None, 'five']]) # type: ignore assert np.all(t['x'] != np.array(None)) assert t['y'][3] == np.array(None) check_tafra(t) t = Tafra({'x': np.array([1, 2, None, 4, None])}) t.coalesce_inplace( 'x', [[1, 2, 3, None, 5], [None, None, None, None, 'five']]) # type: ignore t.coalesce_inplace( 'y', [[1, 2, 3, None, 5], [None, None, None, None, 'five']]) # type: ignore assert np.all(t['x'] != np.array(None)) assert t['y'][3] == np.array(None) check_tafra(t) t = Tafra({'x': np.array([None])}) t.coalesce('x', [[1], [None]]) # type: ignore check_tafra(t)
def test_left_join_equi() -> None: l = Tafra({ 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1]) }) r = Tafra({ 'a': np.array([1, 2, 3, 4, 5, 6]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b']) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 1, 2, 2, 2]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([2, 2, 2, 3, 3, 3]) }) t = l.left_join(r, [('x', 'a', '=='), ('z', 'c', '==')], ['x', 'y', 'a', 'b']) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 1, 2, 2, 2]), '_a': np.array([1, 1, 2, 2, 3, 3]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.left_join(r, [('x', 'a', '=='), ('x', '_a', '==')], ['x', 'y', 'a', 'b']) check_tafra(t) r = Tafra({ 'a': np.array([1, 1, 2, 2, 3, 3]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) t = l.left_join(r, [('x', 'a', '<')], ['x', 'y', 'a', 'b']) check_tafra(t)
def test_left_join_invalid() -> None: l = Tafra({ 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1]) }) r = Tafra({ 'a': np.array([1, 2, 3, 4, 5, 6]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) with pytest.raises(TypeError) as e: t = l.left_join(r, [('x', 'a', '===')], ['x', 'y', 'a', 'b']) r = Tafra({ 'a': np.array([1, 2, 3, 4, 5, 6], dtype='float'), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) with pytest.raises(TypeError) as e: t = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b']) r = Tafra({ 'a': np.array([1, 2, 3, 4, 5, 6]), 'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'c': np.array([0, 0, 0, 1, 1, 1]) }) l._dtypes['x'] = 'float' with pytest.raises(TypeError) as e: t = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
def build_tafra() -> Tafra: return Tafra({ 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1]) })
def test_destructors() -> None: def gen_values() -> Iterator[Dict[str, np.ndarray]]: yield {'x': np.arange(6)} yield {'y': np.arange(6)} t = Tafra(gen_values()) check_tafra(t) t = build_tafra() t = t.update_dtypes({'x': 'float'}) t.data['x'][2] = np.nan check_tafra(t) _ = tuple(t.to_records()) _ = tuple(t.to_records(columns='x')) _ = tuple(t.to_records(columns=['x'])) _ = tuple(t.to_records(columns=['x', 'y'])) _ = tuple(t.to_records(cast_null=False)) _ = tuple(t.to_records(columns='x', cast_null=False)) _ = tuple(t.to_records(columns=['x'], cast_null=False)) _ = tuple(t.to_records(columns=['x', 'y'], cast_null=False)) _ = t.to_list() _ = t.to_list(columns='x') _ = t.to_list(columns=['x']) _ = t.to_list(columns=['x', 'y']) _ = t.to_list(inner=True) _ = t.to_list(columns='x', inner=True) _ = t.to_list(columns=['x'], inner=True) _ = t.to_list(columns=['x', 'y'], inner=True) _ = t.to_tuple() _ = t.to_tuple(columns='x') _ = t.to_tuple(columns=['x']) _ = t.to_tuple(columns=['x', 'y']) _ = t.to_tuple(inner=True) _ = t.to_tuple(columns='x', inner=True) _ = t.to_tuple(columns=['x'], inner=True) _ = t.to_tuple(columns=['x', 'y'], inner=True) _ = t.to_array() _ = t.to_array(columns='x') _ = t.to_array(columns=['x']) _ = t.to_array(columns=['x', 'y']) _ = t.to_pandas() _ = t.to_pandas(columns='x') _ = t.to_pandas(columns=['x']) _ = t.to_pandas(columns=['x', 'y']) filepath = Path('test/test_to_csv.csv') t.to_csv(filepath) t.to_csv(filepath, columns='x') t.to_csv(filepath, columns=['x']) t.to_csv(filepath, columns=['x', 'y'])
def test_map() -> None: t = build_tafra() _ = list(t.row_map(np.repeat, 6)) _ = list(t.tuple_map(np.repeat, 6)) _ = list(t.col_map(np.repeat, repeats=6)) _ = Tafra(t.key_map(np.repeat, repeats=6))
def test_constructions() -> None: t = build_tafra() check_tafra(t) t = Tafra( { 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1]) }, validate=False) check_tafra(t) t = Tafra( { 'x': np.array([1, 2, 3, 4, 5, 6]), 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'), 'z': np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) }, validate=False, check_rows=False) check_tafra(t, check_rows=False) with pytest.raises(TypeError) as e: t = Tafra() # type: ignore # noqa with pytest.raises(ValueError) as e: t = Tafra({}) t = Tafra({'x': None}) with warnings.catch_warnings(record=True) as w: check_tafra(t) t = Tafra({'x': Decimal('1.23456')}) check_tafra(t) t = Tafra({'x': np.array(1)}) check_tafra(t) t = Tafra({'x': np.array([1])}) check_tafra(t) t = Tafra({'x': [True, False]}) check_tafra(t) t = Tafra({'x': 'test'}) check_tafra(t) t = Tafra((('x', np.arange(6)), )) check_tafra(t) t = Tafra([('x', np.arange(6))]) check_tafra(t) t = Tafra([['x', np.arange(6)]]) check_tafra(t) t = Tafra([(np.array('x'), np.arange(6))]) check_tafra(t) t = Tafra([(np.array(['x']), np.arange(6))]) check_tafra(t) t = Tafra([('x', np.arange(6)), ('y', np.linspace(0, 1, 6))]) check_tafra(t) t = Tafra([['x', np.arange(6)], ('y', np.linspace(0, 1, 6))]) check_tafra(t) t = Tafra([('x', np.arange(6)), ['y', np.linspace(0, 1, 6)]]) check_tafra(t) t = Tafra([['x', np.arange(6)], ['y', np.linspace(0, 1, 6)]]) check_tafra(t) t = Tafra([{'x': np.arange(6)}, {'y': np.linspace(0, 1, 6)}]) check_tafra(t) t = Tafra(iter([{'x': np.arange(6)}, {'y': np.linspace(0, 1, 6)}])) check_tafra(t) def iterator() -> Iterator[Dict[str, np.ndarray]]: yield {'x': np.array([1, 2, 3, 4, 5, 6])} yield { 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object') } yield {'z': np.array([0, 0, 0, 1, 1, 1])} t = Tafra(iterator()) check_tafra(t) class DictIterable: def __iter__(self) -> Iterator[Dict[str, np.ndarray]]: yield {'x': np.array([1, 2, 3, 4, 5, 6])} yield { 'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object') } yield {'z': np.array([0, 0, 0, 1, 1, 1])} t = Tafra(DictIterable()) check_tafra(t) t = Tafra(iter(DictIterable())) check_tafra(t) class SequenceIterable: def __iter__(self) -> Iterator[Any]: yield ('x', np.array([1, 2, 3, 4, 5, 6])) yield [ 'y', np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object') ] yield ('z', np.array([0, 0, 0, 1, 1, 1])) t = Tafra(SequenceIterable()) check_tafra(t) class SequenceIterable2: def __iter__(self) -> Iterator[Any]: yield (np.array(['x']), np.array([1, 2, 3, 4, 5, 6])) yield [ np.array(['y']), np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object') ] yield (np.array(['z']), np.array([0, 0, 0, 1, 1, 1])) t = Tafra(SequenceIterable2()) check_tafra(t) t = Tafra(iter(SequenceIterable2())) check_tafra(t) t = Tafra(enumerate(np.arange(6))) check_tafra(t) t = build_tafra() df = pd.DataFrame(t.data) _ = Tafra.from_series(df['x']) check_tafra(_) _ = Tafra.from_dataframe(df) check_tafra(_) _ = Tafra.as_tafra(df) check_tafra(_) _ = Tafra.as_tafra(df['x']) check_tafra(_) _ = Tafra.as_tafra(t) check_tafra(_) _ = Tafra.as_tafra({'x': np.array(1)}) check_tafra(_) _ = Tafra.from_series(Series()) check_tafra(_) _ = Tafra.as_tafra(Series()) check_tafra(_) _ = Tafra.from_dataframe(DataFrame()) # type: ignore check_tafra(_) _ = Tafra.as_tafra(DataFrame()) check_tafra(_) with pytest.raises(TypeError) as e: t = Tafra([{1, 2}]) # type: ignore class BadIterable: def __iter__(self) -> Iterator[Any]: yield {1, 2} yield {3.1412159, .5772156} with pytest.raises(TypeError) as e: t = Tafra(BadIterable()) with pytest.raises(TypeError) as e: t = Tafra(iter(BadIterable())) with pytest.raises(TypeError) as e: _ = Tafra(np.arange(6)) with pytest.raises(TypeError) as e: _ = Tafra.as_tafra(np.arange(6)) with pytest.raises(ValueError) as e: t = Tafra({'x': np.array([1, 2]), 'y': np.array([3., 4., 5.])})
def test_constructions() -> None: with pytest.raises(TypeError) as e: t = Tafra() # type: ignore # noqa with pytest.raises(ValueError) as e: t = Tafra({}) # type: ignore t = Tafra({'x': None}) check_tafra(t) t = Tafra({'x': Decimal('1.23456')}) check_tafra(t) t = Tafra({'x': np.array(1)}) check_tafra(t) t = Tafra({'x': np.array([1])}) check_tafra(t) t = Tafra({'x': [True, False]}) check_tafra(t) t = Tafra({'x': 'test'}) check_tafra(t) t.update_dtypes_inplace({'x': 'O'}) check_tafra(t) t = Tafra(enumerate(np.arange(6))) check_tafra(t) with pytest.raises(ValueError) as e: t = Tafra({'x': np.array([1, 2]), 'y': np.array([3., 4., 5.])}) def gen_values() -> Iterator[Dict[str, np.ndarray]]: yield {'x': np.arange(6)} yield {'y': np.arange(6)} t = Tafra(gen_values()) check_tafra(t) t = build_tafra() t = t.update_dtypes({'x': 'float'}) t.data['x'][2] = np.nan check_tafra(t) _ = tuple(t.to_records()) _ = tuple(t.to_records(columns='x')) _ = tuple(t.to_records(columns=['x'])) _ = tuple(t.to_records(columns=['x', 'y'])) _ = tuple(t.to_records(cast_null=False)) _ = tuple(t.to_records(columns='x', cast_null=False)) _ = tuple(t.to_records(columns=['x'], cast_null=False)) _ = tuple(t.to_records(columns=['x', 'y'], cast_null=False)) _ = t.to_list() _ = t.to_list(columns='x') _ = t.to_list(columns=['x']) _ = t.to_list(columns=['x', 'y']) _ = t.to_list(inner=True) _ = t.to_list(columns='x', inner=True) _ = t.to_list(columns=['x'], inner=True) _ = t.to_list(columns=['x', 'y'], inner=True) _ = t.to_array() _ = t.to_array(columns='x') _ = t.to_array(columns=['x']) _ = t.to_array(columns=['x', 'y']) t = build_tafra() df = pd.DataFrame(t.data) _ = Tafra.from_series(df['x']) check_tafra(_) _ = Tafra.from_dataframe(df) check_tafra(_) _ = Tafra.as_tafra(df) check_tafra(_) _ = Tafra.as_tafra(df['x']) check_tafra(_) _ = Tafra.as_tafra(t) check_tafra(_) _ = Tafra.as_tafra({'x': np.array(1)}) check_tafra(_) _ = Tafra.from_series(Series()) check_tafra(_) _ = Tafra.as_tafra(Series()) check_tafra(_) _ = Tafra.from_dataframe(DataFrame()) # type: ignore check_tafra(_) _ = Tafra.as_tafra(DataFrame()) check_tafra(_) with pytest.raises(TypeError) as e: _ = Tafra(np.arange(6)) with pytest.raises(TypeError) as e: _ = Tafra.as_tafra(np.arange(6))
def test_map() -> None: t = build_tafra() _ = list(t.row_map(np.repeat, 6)) _ = Tafra(t.col_map(np.repeat, name=True, repeats=6)) _ = list(t.col_map(np.repeat, name=False, repeats=6))