Ejemplo n.º 1
0
 def test_join_on_fails_with_different_left_index(self):
     with pytest.raises(ValueError):
         df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                         'b': np.random.randn(3)},
                        index=tm.makeCustomIndex(10, 2))
         df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                          'b': np.random.randn(10)})
         merge(df, df2, right_on='b', left_index=True)
Ejemplo n.º 2
0
 def test_join_on_fails_with_different_column_counts(self):
     with tm.assertRaises(ValueError):
         df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                         'b': np.random.randn(3)})
         df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                          'b': np.random.randn(10)},
                         index=tm.makeCustomIndex(10, 2))
         merge(df, df2, right_on='a', left_on=['a', 'b'])
Ejemplo n.º 3
0
 def test_join_on_fails_with_different_column_counts(self):
     with tm.assertRaises(ValueError):
         df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                         'b': np.random.randn(3)})
         df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                          'b': np.random.randn(10)},
                         index=tm.makeCustomIndex(10, 2))
         merge(df, df2, right_on='a', left_on=['a', 'b'])
Ejemplo n.º 4
0
 def test_join_on_fails_with_different_left_index(self):
     with pytest.raises(ValueError):
         df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                         'b': np.random.randn(3)},
                        index=tm.makeCustomIndex(10, 2))
         df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                          'b': np.random.randn(10)})
         merge(df, df2, right_on='b', left_index=True)
Ejemplo n.º 5
0
 def test_join_on_fails_with_different_column_counts(self):
     df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                     'b': np.random.randn(3)})
     df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                      'b': np.random.randn(10)},
                     index=tm.makeCustomIndex(10, 2))
     msg = r"len\(right_on\) must equal len\(left_on\)"
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on='a', left_on=['a', 'b'])
Ejemplo n.º 6
0
 def test_join_on_fails_with_different_column_counts(self):
     df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                     'b': np.random.randn(3)})
     df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                      'b': np.random.randn(10)},
                     index=tm.makeCustomIndex(10, 2))
     msg = r"len\(right_on\) must equal len\(left_on\)"
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on='a', left_on=['a', 'b'])
Ejemplo n.º 7
0
 def test_join_on_fails_with_different_left_index(self):
     df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                     'b': np.random.randn(3)},
                    index=tm.makeCustomIndex(3, 2))
     df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                      'b': np.random.randn(10)})
     msg = (r'len\(right_on\) must equal the number of levels in the index'
            ' of "left"')
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on='b', left_index=True)
Ejemplo n.º 8
0
 def test_join_on_fails_with_different_left_index(self):
     df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
                     'b': np.random.randn(3)},
                    index=tm.makeCustomIndex(3, 2))
     df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
                      'b': np.random.randn(10)})
     msg = (r'len\(right_on\) must equal the number of levels in the index'
            ' of "left"')
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on='b', left_index=True)
Ejemplo n.º 9
0
 def test_join_on_fails_with_different_column_counts(self):
     df = DataFrame(
         {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
     )
     df2 = DataFrame(
         {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
         index=tm.makeCustomIndex(10, 2),
     )
     msg = r"len\(right_on\) must equal len\(left_on\)"
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on="a", left_on=["a", "b"])
Ejemplo n.º 10
0
 def test_join_on_fails_with_different_right_index(self):
     df = DataFrame(
         {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
     )
     df2 = DataFrame(
         {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
         index=tm.makeCustomIndex(10, 2),
     )
     msg = (
         r"len\(left_on\) must equal the number of levels in the index" ' of "right"'
     )
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, left_on="a", right_index=True)
Ejemplo n.º 11
0
def test_fails_on_no_datetime_index():
    index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
    index_funcs = (tm.makeIntIndex,
                   tm.makeUnicodeIndex, tm.makeFloatIndex,
                   lambda m: tm.makeCustomIndex(m, 2))
    n = 2
    for name, func in zip(index_names, index_funcs):
        index = func(n)
        df = DataFrame({'a': np.random.randn(n)}, index=index)

        msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
               "or PeriodIndex, but got an instance of %r" % name)
        with pytest.raises(TypeError, match=msg):
            df.groupby(TimeGrouper('D'))
Ejemplo n.º 12
0
def metadata_column_headers(request):
    """Make a list of metadata column headers.

    Returns:
        list: A metadata column header list whose length is between 0
            and `MAX_METADATA_COLS`.
    """
    if request.param is 0:
        return list()
    # pandas bug (?) in makeCustomIndex when nentries = 1
    elif request.param is 1:
        return ['M_l0_g0']
    else:
        return list(makeCustomIndex(request.param, 1, prefix='M'))
Ejemplo n.º 13
0
 def test_fails_on_no_datetime_index(self):
     index_names = ('Int64Index', 'PeriodIndex', 'Index', 'Float64Index',
                    'MultiIndex')
     index_funcs = (tm.makeIntIndex, tm.makePeriodIndex,
                    tm.makeUnicodeIndex, tm.makeFloatIndex,
                    lambda m: tm.makeCustomIndex(m, 2))
     n = 2
     for name, func in zip(index_names, index_funcs):
         index = func(n)
         df = DataFrame({'a': np.random.randn(n)}, index=index)
         with tm.assertRaisesRegexp(TypeError,
                                    "axis must be a DatetimeIndex, "
                                    "but got an instance of %r" % name):
             df.groupby(TimeGrouper('D'))
Ejemplo n.º 14
0
 def test_fails_on_no_datetime_index(self):
     index_names = ('Int64Index', 'PeriodIndex', 'Index', 'Float64Index',
                    'MultiIndex')
     index_funcs = (tm.makeIntIndex, tm.makePeriodIndex,
                    tm.makeUnicodeIndex, tm.makeFloatIndex,
                    lambda m: tm.makeCustomIndex(m, 2))
     n = 2
     for name, func in zip(index_names, index_funcs):
         index = func(n)
         df = DataFrame({'a': np.random.randn(n)}, index=index)
         with tm.assertRaisesRegexp(
                 TypeError, "axis must be a DatetimeIndex, "
                 "but got an instance of %r" % name):
             df.groupby(TimeGrouper('D'))
Ejemplo n.º 15
0
		pd.Series(np.arange(4,9)) # using the numpy function
		pd.Series(np.linspace(0,9,5)) # allows to specify the number of values to be created btw boundaries

		pd.Series(np.random.normal(size=5))
		np.random.randint(50,101,10)


		a = np.array([4] * 16)
		a[1::] = [42] * 15
		a[1:8:2] = 16


		import pandas.util.testing as tm
		tm.N, tm.K = 5,3
		tm.makeFloatSeries(), tm.makeBoolIndex(), tm.makeCategoricalIndex()
		tm.makeCustomIndex(nentries=4,nlevels=2), tm.makeFloatIndex(), tm.makeIntIndex()
		tm.makeMultiIndex(), tm.makeRangeIndex(), tm.makeIntervalIndex()

		# All possible combinations (Permutations)
			from itertools import permutations 
			my_list = [1,2,3]
			perm = list(permutations(my_list))

				#(1, 2, 3)
				#(1, 3, 2)
				#(2, 1, 3)
				#(2, 3, 1)
				#(3, 1, 2)
				#(3, 2, 1)

Ejemplo n.º 16
0
    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df['close'] / df['open']

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)


@pytest.mark.parametrize('name, func',
                         [('Int64Index', tm.makeIntIndex),
                          ('Index', tm.makeUnicodeIndex),
                          ('Float64Index', tm.makeFloatIndex),
                          ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))])
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({'a': np.random.randn(n)}, index=index)

    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
           "or PeriodIndex, but got an instance of '{}'".format(name))
    with pytest.raises(TypeError, match=msg):
        df.groupby(TimeGrouper('D'))


def test_aaa_group_order():
    # GH 12840
    # check TimeGrouper perform stable sorts
    n = 20
Ejemplo n.º 17
0
 def test_raise_on_panel4d_with_multiindex(self, parser, engine):
     tm.skip_if_no_ne()
     p4d = tm.makePanel4D(7)
     p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2)
     with pytest.raises(NotImplementedError):
         pd.eval('p4d + 1', parser=parser, engine=engine)
Ejemplo n.º 18
0
class TestResolvePath(object):
    archive = '.\\'
    headers = makeCustomIndex(7, 1, prefix='C')

    multi_schema = SortedDict({'0': headers[1], '1': headers[3]})
    multi_schema2 = SortedDict({
        '0': headers[0],
        '1': headers[2],
        '2': headers[3]
    })
    multi_schema3 = SortedDict({'0': headers[1], '1': headers[6]})

    single_schema = SortedDict({
        '0': headers[0],
    })
    single_schema2 = SortedDict({'0': headers[4]})

    @staticmethod
    def data_gen_invalid(row: int, col: int):
        valmap_invalid = [
            ['val', 'val', 'xxx', 'val'],
            ['val', nan, 'val', 'val'],
            ['val', nan, 'val', nan],
            [nan, nan, 'val', 'xxx'],  # noqa: E201
            ['xxx', nan, 'val', nan]
        ]

        if row < len(valmap_invalid):
            if col < len(valmap_invalid[row]):
                return valmap_invalid[row][col]
        return make_dataframe_value(row, col)

    @staticmethod
    def data_gen(row: int, col: int):
        valmap = [
            ['val', 'val', 'val', 'val'],
            ['val', nan, 'val', 'val'],
            ['val', nan, 'val', nan],
            [nan, nan, 'val', nan],  # noqa: E201
            [nan, nan, 'val', nan]  # noqa: E201
        ]

        if row < len(valmap):
            if col < len(valmap[row]):
                return valmap[row][col]
        return make_dataframe_value(row, col)

    @staticmethod
    def data_gen_normalizable(row: int, col: int):
        valmap = [
            ['Value 1.', 'Value 1.', 'Value 1.', 'Value 1.'],
            ['Value 1.', nan, 'Value 1.', 'Value 1.'],
            ['Value 1.', nan, 'Value 1.', nan],
            [nan, nan, 'Value 1.', nan],  # noqa: E201
            [nan, nan, 'Value 1.', nan]  # noqa: E201
        ]

        if row < len(valmap):
            if col < len(valmap[row]):
                return valmap[row][col]
        return make_dataframe_value(row, col)

    @pytest.mark.parametrize('schema, expected', [
        (single_schema, join(archive, 'val')),
        (multi_schema, join(archive, 'val', 'val')),
        (multi_schema2, join(archive, 'val', 'val', 'val')),
    ])
    def test_resolve_path(self, schema, expected):
        data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen)

        actual = resolve_path(self.archive, schema, data)

        assert actual == expected

    @pytest.mark.parametrize(
        'schema, expected',
        [(single_schema, join(archive, 'value_1')),
         (multi_schema, join(archive, 'value_1', 'value_1')),
         (multi_schema2, join(archive, 'value_1', 'value_1', 'value_1'))])
    def test_resolve_path_normalized(self, schema, expected):
        data = make_dataframe(5,
                              4,
                              data_gen_f=TestResolvePath.data_gen_normalizable)

        actual = resolve_path(self.archive, schema, data)

        assert actual == expected

    @pytest.mark.parametrize('schema', [
        single_schema2,
        multi_schema3,
    ])
    def test_resolve_path_indexerror(self, schema):
        data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen)

        with pytest.raises(IndexError):
            resolve_path(self.archive, schema, data)

    @pytest.mark.parametrize('schema', [
        single_schema,
        multi_schema,
        multi_schema2,
    ])
    def test_resolve_path_valueerror(self, schema):
        data = make_dataframe(5,
                              4,
                              data_gen_f=TestResolvePath.data_gen_invalid)

        with pytest.raises(ValueError):
            resolve_path(self.archive, schema, data)
 def check_raise_on_panel_with_multiindex(self, parser, engine):
     tm.skip_if_no_ne()
     p = tm.makePanel(7)
     p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
     with tm.assertRaises(NotImplementedError):
         pd.eval('p + 1', parser=parser, engine=engine)
 def test_raise_on_panel_with_multiindex(self, parser, engine):
     p = tm.makePanel(7)
     p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
     with pytest.raises(NotImplementedError):
         pd.eval('p + 1', parser=parser, engine=engine)
Ejemplo n.º 21
0
 def test_raise_on_panel_with_multiindex(self, parser, engine):
     p = tm.makePanel(7)
     p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
     with pytest.raises(NotImplementedError):
         pd.eval('p + 1', parser=parser, engine=engine)
Ejemplo n.º 22
0
    def f(df):
        return df["close"] / df["open"]

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)


@pytest.mark.parametrize(
    "name, func",
    [
        ("Int64Index", tm.makeIntIndex),
        ("Index", tm.makeUnicodeIndex),
        ("Float64Index", tm.makeFloatIndex),
        ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
    ],
)
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({"a": np.random.randn(n)}, index=index)

    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
           f"or PeriodIndex, but got an instance of '{name}'")
    with pytest.raises(TypeError, match=msg):
        df.groupby(Grouper(freq="D"))


def test_aaa_group_order():
    # GH 12840
Ejemplo n.º 23
0
    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df['close'] / df['open']

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)


@pytest.mark.parametrize('name, func', [
    ('Int64Index', tm.makeIntIndex),
    ('Index', tm.makeUnicodeIndex),
    ('Float64Index', tm.makeFloatIndex),
    ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
])
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({'a': np.random.randn(n)}, index=index)

    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
           "or PeriodIndex, but got an instance of '{}'".format(name))
    with pytest.raises(TypeError, match=msg):
        df.groupby(TimeGrouper('D'))


def test_aaa_group_order():
    # GH 12840
    # check TimeGrouper perform stable sorts
Ejemplo n.º 24
0
 def check_raise_on_panel4d_with_multiindex(self, parser, engine):
     tm.skip_if_no_ne()
     p4d = tm.makePanel4D(7)
     p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2)
     with tm.assertRaises(NotImplementedError):
         pd.eval('p4d + 1', parser=parser, engine=engine)