Ejemplo n.º 1
0
    def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category') for col in original], axis=1)
        with tm.ensure_clean() as path:
            tm.assertRaises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category') for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            tm.assert_equal(len(w), 1)  # should get a warning for mixed content
Ejemplo n.º 2
0
def test_series_groupby_errors():
    s = pd.Series([1, 2, 2, 1, 1])

    ss = dd.from_pandas(s, npartitions=2)

    msg = "Grouper for '1' not 1-dimensional"
    with tm.assertRaisesRegexp(ValueError, msg):
        s.groupby([1, 2])  # pandas
    with tm.assertRaisesRegexp(ValueError, msg):
        ss.groupby([1, 2]) # dask should raise the same error
    msg = "Grouper for '2' not 1-dimensional"
    with tm.assertRaisesRegexp(ValueError, msg):
        s.groupby([2])  # pandas
    with tm.assertRaisesRegexp(ValueError, msg):
        ss.groupby([2]) # dask should raise the same error

    msg = "No group keys passed!"
    with tm.assertRaisesRegexp(ValueError, msg):
        s.groupby([])  # pandas
    with tm.assertRaisesRegexp(ValueError, msg):
        ss.groupby([]) # dask should raise the same error

    sss = dd.from_pandas(s, npartitions=3)
    assert raises(NotImplementedError, lambda: ss.groupby(sss))

    with tm.assertRaises(KeyError):
        s.groupby('x')  # pandas
    with tm.assertRaises(KeyError):
        ss.groupby('x') # dask should raise the same error
Ejemplo n.º 3
0
def test_loc2d():
    # index indexer is always regarded as slice for duplicated values
    assert_eq(d.loc[5, 'a'], full.loc[5:5, 'a'])
    # assert_eq(d.loc[[5], 'a'], full.loc[[5], 'a'])
    assert_eq(d.loc[5, ['a']], full.loc[5:5, ['a']])
    # assert_eq(d.loc[[5], ['a']], full.loc[[5], ['a']])

    assert_eq(d.loc[3:8, 'a'], full.loc[3:8, 'a'])
    assert_eq(d.loc[:8, 'a'], full.loc[:8, 'a'])
    assert_eq(d.loc[3:, 'a'], full.loc[3:, 'a'])
    assert_eq(d.loc[[8], 'a'], full.loc[[8], 'a'])

    assert_eq(d.loc[3:8, ['a']], full.loc[3:8, ['a']])
    assert_eq(d.loc[:8, ['a']], full.loc[:8, ['a']])
    assert_eq(d.loc[3:, ['a']], full.loc[3:, ['a']])
    assert_eq(d.loc[[3, 4, 3], ['a']], full.loc[[3, 4, 3], ['a']])

    # 3d
    with tm.assertRaises(pd.core.indexing.IndexingError):
        d.loc[3, 3, 3]

    # Series should raise
    with tm.assertRaises(pd.core.indexing.IndexingError):
        d.a.loc[3, 3]

    with tm.assertRaises(pd.core.indexing.IndexingError):
        d.a.loc[3:, 3]

    with tm.assertRaises(pd.core.indexing.IndexingError):
        d.a.loc[d.a % 2 == 0, 3]
Ejemplo n.º 4
0
    def test_errorbar_plot(self):

        s = Series(np.arange(10))
        s_err = np.random.randn(10)

        # test line and bar plots
        kinds = ['line', 'bar']
        for kind in kinds:
            _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
            _check_plot_works(s.plot, yerr=s_err, kind=kind)
            _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)

        _check_plot_works(s.plot, xerr=s_err)

        # test time series plotting
        ix = date_range('1/1/2000', '1/1/2001', freq='M')
        ts = Series(np.arange(12), index=ix)
        ts_err = Series(np.random.randn(12), index=ix)

        _check_plot_works(ts.plot, yerr=ts_err)

        # check incorrect lengths and types
        with tm.assertRaises(ValueError):
            s.plot(yerr=np.arange(11))

        s_err = ['zzz']*10
        with tm.assertRaises(TypeError):
            s.plot(yerr=s_err)
Ejemplo n.º 5
0
    def test_errorbar_plot(self):

        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
        df = DataFrame(d)
        d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
        df_err = DataFrame(d_err)

        # check line plots
        _check_plot_works(df.plot, yerr=df_err, logy=True)
        _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)

        kinds = ['line', 'bar', 'barh']
        for kind in kinds:
            _check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
            _check_plot_works(df.plot, yerr=d_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind)

        _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True)

        # yerr is raw error values
        _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4)
        _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4)

        # yerr is column name
        df['yerr'] = np.ones(12)*0.2
        _check_plot_works(df.plot, y='y', x='x', yerr='yerr')

        with tm.assertRaises(ValueError):
            df.plot(yerr=np.random.randn(11))

        df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12})
        with tm.assertRaises(TypeError):
            df.plot(yerr=df_err)
Ejemplo n.º 6
0
    def test_drop_column(self):
        expected = self.read_csv(self.csv15)
        expected["byte_"] = expected["byte_"].astype(np.int8)
        expected["int_"] = expected["int_"].astype(np.int16)
        expected["long_"] = expected["long_"].astype(np.int32)
        expected["float_"] = expected["float_"].astype(np.float32)
        expected["double_"] = expected["double_"].astype(np.float64)
        expected["date_td"] = expected["date_td"].apply(datetime.strptime, args=("%Y-%m-%d",))

        columns = ["byte_", "int_", "long_"]
        expected = expected[columns]
        dropped = read_stata(self.dta15_117, convert_dates=True, columns=columns)

        tm.assert_frame_equal(expected, dropped)

        # See PR 10757
        columns = ["int_", "long_", "byte_"]
        expected = expected[columns]
        reordered = read_stata(self.dta15_117, convert_dates=True, columns=columns)
        tm.assert_frame_equal(expected, reordered)

        with tm.assertRaises(ValueError):
            columns = ["byte_", "byte_"]
            read_stata(self.dta15_117, convert_dates=True, columns=columns)

        with tm.assertRaises(ValueError):
            columns = ["byte_", "int_", "long_", "not_found"]
            read_stata(self.dta15_117, convert_dates=True, columns=columns)
Ejemplo n.º 7
0
 def test_invalid_encoding(self):
     # test case for testing invalid encoding
     data = self.data['string']
     with tm.assertRaises(ValueError):
         data.to_clipboard(encoding='ascii')
     with tm.assertRaises(NotImplementedError):
         pd.read_clipboard(encoding='ascii')
Ejemplo n.º 8
0
    def test_drop_column(self):
        expected = self.read_csv(self.csv15)
        expected['byte_'] = expected['byte_'].astype(np.int8)
        expected['int_'] = expected['int_'].astype(np.int16)
        expected['long_'] = expected['long_'].astype(np.int32)
        expected['float_'] = expected['float_'].astype(np.float32)
        expected['double_'] = expected['double_'].astype(np.float64)
        expected['date_td'] = expected['date_td'].apply(datetime.strptime,
                                                        args=('%Y-%m-%d',))

        columns = ['byte_', 'int_', 'long_']
        expected = expected[columns]
        dropped = read_stata(self.dta15_117, convert_dates=True,
                             columns=columns)

        tm.assert_frame_equal(expected, dropped)

        # See PR 10757
        columns = ['int_', 'long_', 'byte_']
        expected = expected[columns]
        reordered = read_stata(self.dta15_117, convert_dates=True,
                               columns=columns)
        tm.assert_frame_equal(expected, reordered)

        with tm.assertRaises(ValueError):
            columns = ['byte_', 'byte_']
            read_stata(self.dta15_117, convert_dates=True, columns=columns)

        with tm.assertRaises(ValueError):
            columns = ['byte_', 'int_', 'long_', 'not_found']
            read_stata(self.dta15_117, convert_dates=True, columns=columns)
Ejemplo n.º 9
0
    def test_nested_scope(self):
        from pandas.computation.ops import UndefinedVariableError
        engine = self.engine
        parser = self.parser
        # smoke test
        x = 1  # noqa
        result = pd.eval('x + 1', engine=engine, parser=parser)
        self.assertEqual(result, 2)

        df = DataFrame(np.random.randn(5, 3))
        df2 = DataFrame(np.random.randn(5, 3))

        # don't have the pandas parser
        with tm.assertRaises(SyntaxError):
            df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser)

        with tm.assertRaises(UndefinedVariableError):
            df.query('(df>0) & (df2>0)', engine=engine, parser=parser)

        expected = df[(df > 0) & (df2 > 0)]
        result = pd.eval('df[(df > 0) & (df2 > 0)]', engine=engine,
                         parser=parser)
        assert_frame_equal(expected, result)

        expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
        result = pd.eval('df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]',
                         engine=engine, parser=parser)
        assert_frame_equal(expected, result)
Ejemplo n.º 10
0
    def test_getitem_day(self):
        # GH 6716
        # Confirm DatetimeIndex and PeriodIndex works identically
        didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
        pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)

        for idx in [didx, pidx]:
            # getitem against index should raise ValueError
            values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
                      '2013/02/01 09:00']
            for v in values:

                if _np_version_under1p9:
                    with tm.assertRaises(ValueError):
                        idx[v]
                else:
                    # GH7116
                    # these show deprecations as we are trying
                    # to slice with non-integer indexers
                    # with tm.assertRaises(IndexError):
                    #    idx[v]
                    continue

            s = Series(np.random.rand(len(idx)), index=idx)
            tm.assert_series_equal(s['2013/01'], s[0:31])
            tm.assert_series_equal(s['2013/02'], s[31:59])
            tm.assert_series_equal(s['2014'], s[365:])

            invalid = ['2013/02/01 9H', '2013/02/01 09:00']
            for v in invalid:
                with tm.assertRaises(KeyError):
                    s[v]
Ejemplo n.º 11
0
    def test_implementation_limits(self):
        min_td = Timedelta(Timedelta.min)
        max_td = Timedelta(Timedelta.max)

        # GH 12727
        # timedelta limits correspond to int64 boundaries
        self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1)
        self.assertTrue(max_td.value == np.iinfo(np.int64).max)

        # Beyond lower limit, a NAT before the Overflow
        self.assertIsInstance(min_td - Timedelta(1, 'ns'),
                              pd.tslib.NaTType)

        with tm.assertRaises(OverflowError):
            min_td - Timedelta(2, 'ns')

        with tm.assertRaises(OverflowError):
            max_td + Timedelta(1, 'ns')

        # Same tests using the internal nanosecond values
        td = Timedelta(min_td.value - 1, 'ns')
        self.assertIsInstance(td, pd.tslib.NaTType)

        with tm.assertRaises(OverflowError):
            Timedelta(min_td.value - 2, 'ns')

        with tm.assertRaises(OverflowError):
            Timedelta(max_td.value + 1, 'ns')
Ejemplo n.º 12
0
    def test_hist_layout(self):
        df = DataFrame(randn(100, 3))

        layout_to_expected_size = (
            {'layout': None, 'expected_size': (2, 2)},  # default is 2x2
            {'layout': (2, 2), 'expected_size': (2, 2)},
            {'layout': (4, 1), 'expected_size': (4, 1)},
            {'layout': (1, 4), 'expected_size': (1, 4)},
            {'layout': (3, 3), 'expected_size': (3, 3)},
            {'layout': (-1, 4), 'expected_size': (1, 4)},
            {'layout': (4, -1), 'expected_size': (4, 1)},
            {'layout': (-1, 2), 'expected_size': (2, 2)},
            {'layout': (2, -1), 'expected_size': (2, 2)}
        )

        for layout_test in layout_to_expected_size:
            axes = df.hist(layout=layout_test['layout'])
            expected = layout_test['expected_size']
            self._check_axes_shape(axes, axes_num=3, layout=expected)

        # layout too small for all 4 plots
        with tm.assertRaises(ValueError):
            df.hist(layout=(1, 1))

        # invalid format for layout
        with tm.assertRaises(ValueError):
            df.hist(layout=(1,))
        with tm.assertRaises(ValueError):
            df.hist(layout=(-1, -1))
Ejemplo n.º 13
0
 def test_no_order(self):
     _skip_if_no_scipy()
     s = Series([0, 1, np.nan, 3])
     with tm.assertRaises(ValueError):
         s.interpolate(method='polynomial')
     with tm.assertRaises(ValueError):
         s.interpolate(method='spline')
Ejemplo n.º 14
0
    def test_hist_layout(self):
        df = self.hist_df
        with tm.assertRaises(ValueError):
            df.height.hist(layout=(1, 1))

        with tm.assertRaises(ValueError):
            df.height.hist(layout=[1, 1])
Ejemplo n.º 15
0
    def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([0, np.nan, 0], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False, fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            sparse.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            sparse.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, -6]))
        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, 5]))
        with tm.assertRaises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)
Ejemplo n.º 16
0
    def test_nonunique_raises(self):
        df = pd.DataFrame([[1, 2]], columns=['A', 'A'])
        with tm.assertRaises(ValueError):
            df.style

        with tm.assertRaises(ValueError):
            Styler(df)
Ejemplo n.º 17
0
def test_get_division():
    pdf = pd.DataFrame(np.random.randn(10, 5), columns=list("abcde"))
    ddf = dd.from_pandas(pdf, 3)
    assert ddf.divisions == (0, 4, 8, 9)

    # DataFrame
    div1 = ddf.get_division(0)
    assert isinstance(div1, dd.DataFrame)
    assert eq(div1, pdf.loc[0:3])
    div2 = ddf.get_division(1)
    assert eq(div2, pdf.loc[4:7])
    div3 = ddf.get_division(2)
    assert eq(div3, pdf.loc[8:9])
    assert len(div1) + len(div2) + len(div3) == len(pdf)

    # Series
    div1 = ddf.a.get_division(0)
    assert isinstance(div1, dd.Series)
    assert eq(div1, pdf.a.loc[0:3])
    div2 = ddf.a.get_division(1)
    assert eq(div2, pdf.a.loc[4:7])
    div3 = ddf.a.get_division(2)
    assert eq(div3, pdf.a.loc[8:9])
    assert len(div1) + len(div2) + len(div3) == len(pdf.a)

    with tm.assertRaises(ValueError):
        ddf.get_division(-1)

    with tm.assertRaises(ValueError):
        ddf.get_division(3)
Ejemplo n.º 18
0
    def check_str_query_method(self, parser, engine):
        tm.skip_if_no_ne(engine)
        df = DataFrame(randn(10, 1), columns=['b'])
        df['strings'] = Series(list('aabbccddee'))
        expect = df[df.strings == 'a']

        if parser != 'pandas':
            col = 'strings'
            lst = '"a"'

            lhs = [col] * 2 + [lst] * 2
            rhs = lhs[::-1]

            eq, ne = '==', '!='
            ops = 2 * ([eq] + [ne])

            for lhs, op, rhs in zip(lhs, ops, rhs):
                ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs)
                assertRaises(NotImplementedError, df.query, ex, engine=engine,
                             parser=parser, local_dict={'strings': df.strings})
        else:
            res = df.query('"a" == strings', engine=engine, parser=parser)
            assert_frame_equal(res, expect)

            res = df.query('strings == "a"', engine=engine, parser=parser)
            assert_frame_equal(res, expect)
            assert_frame_equal(res, df[df.strings.isin(['a'])])

            expect = df[df.strings != 'a']
            res = df.query('strings != "a"', engine=engine, parser=parser)
            assert_frame_equal(res, expect)

            res = df.query('"a" != strings', engine=engine, parser=parser)
            assert_frame_equal(res, expect)
            assert_frame_equal(res, df[~df.strings.isin(['a'])])
Ejemplo n.º 19
0
    def test_hist_layout(self):
        import matplotlib.pyplot as plt

        df = DataFrame(randn(100, 4))

        layout_to_expected_size = (
            {"layout": None, "expected_size": (2, 2)},  # default is 2x2
            {"layout": (2, 2), "expected_size": (2, 2)},
            {"layout": (4, 1), "expected_size": (4, 1)},
            {"layout": (1, 4), "expected_size": (1, 4)},
            {"layout": (3, 3), "expected_size": (3, 3)},
        )

        for layout_test in layout_to_expected_size:
            ax = df.hist(layout=layout_test["layout"])
            self.assertEqual(len(ax), layout_test["expected_size"][0])
            self.assertEqual(len(ax[0]), layout_test["expected_size"][1])

        # layout too small for all 4 plots
        with tm.assertRaises(ValueError):
            df.hist(layout=(1, 1))

        # invalid format for layout
        with tm.assertRaises(ValueError):
            df.hist(layout=(1,))
Ejemplo n.º 20
0
    def test_nat_arithmetic(self):
        # GH 6873
        nat = tslib.NaT
        t = Timestamp('2014-01-01')
        dt = datetime.datetime(2014, 1, 1)
        delta = datetime.timedelta(3600)

        # Timestamp / datetime
        for (left, right) in [(nat, nat), (nat, t), (dt, nat)]:
            # NaT + Timestamp-like should raise TypeError
            with tm.assertRaises(TypeError):
                left + right
            with tm.assertRaises(TypeError):
                right + left

            # NaT - Timestamp-like (or inverse) returns NaT
            self.assertTrue((left - right) is tslib.NaT)
            self.assertTrue((right - left) is tslib.NaT)

        # timedelta-like
        # offsets are tested in test_offsets.py
        for (left, right) in [(nat, delta)]:
            # NaT + timedelta-like returns NaT
            self.assertTrue((left + right) is tslib.NaT)
            # timedelta-like + NaT should raise TypeError
            with tm.assertRaises(TypeError):
                right + left

            self.assertTrue((left - right) is tslib.NaT)
            with tm.assertRaises(TypeError):
                right - left
Ejemplo n.º 21
0
    def test_unstack_non_unique_index_names(self):
        idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"])
        df = DataFrame([1, 2], index=idx)
        with tm.assertRaises(ValueError):
            df.unstack("c1")

        with tm.assertRaises(ValueError):
            df.T.stack("c1")
Ejemplo n.º 22
0
    def test_s3_fails(self):
        with tm.assertRaises(IOError):
            read_csv('s3://nyqpug/asdf.csv')

        # Receive a permission error when trying to read a private bucket.
        # It's irrelevant here that this isn't actually a table.
        with tm.assertRaises(IOError):
            read_csv('s3://cant_get_it/')
Ejemplo n.º 23
0
 def test_no_index(self):
     columns = ["x", "y"]
     original = DataFrame(np.reshape(np.arange(10.0), (5, 2)), columns=columns)
     original.index.name = "index_not_written"
     with tm.ensure_clean() as path:
         original.to_stata(path, write_index=False)
         written_and_read_again = self.read_dta(path)
         tm.assertRaises(KeyError, lambda: written_and_read_again["index_not_written"])
Ejemplo n.º 24
0
    def test_catch_too_many_names(self):
        # see gh-5156
        data = """\
1,2,3
4,,6
7,8,9
10,11,12\n"""
        tm.assertRaises(ValueError, self.read_csv, StringIO(data), header=0, names=["a", "b", "c", "d"])
Ejemplo n.º 25
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex
        result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                                Timestamp('2011-01-02 10:00',
                                          tz='Asia/Tokyo')],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')
             ], tz='Asia/Tokyo', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
                                Timestamp('2011-08-01 10:00',
                                          tz='US/Eastern')],
                               name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
                             Timestamp('2011-08-01 10:00')],
                            tz='US/Eastern', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # different tz coerces tz-naive to tz-awareIndex(dtype=object)
        result = DatetimeIndex([Timestamp('2011-01-01 10:00'),
                                Timestamp('2011-01-02 10:00',
                                          tz='US/Eastern')], name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01 05:00'),
                             Timestamp('2011-01-02 10:00')],
                            tz='US/Eastern', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # tz mismatch affecting to tz-aware raises TypeError/ValueError
        with tm.assertRaises(ValueError):
            DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          name='idx')

        with tm.assertRaises(TypeError):
            DatetimeIndex([Timestamp('2011-01-01 10:00'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          tz='Asia/Tokyo', name='idx')

        with tm.assertRaises(ValueError):
            DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                           Timestamp('2011-01-02 10:00', tz='US/Eastern')],
                          tz='US/Eastern', name='idx')
Ejemplo n.º 26
0
 def test_unsortable(self):
     # GH 13714
     arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
     if compat.PY2 and not pd._np_version_under1p10:
         # RuntimeWarning: tp_compare didn't return -1 or -2 for exception
         with tm.assert_produces_warning(RuntimeWarning):
             tm.assertRaises(TypeError, algos.safe_sort, arr)
     else:
         tm.assertRaises(TypeError, algos.safe_sort, arr)
Ejemplo n.º 27
0
    def test_unstack_non_unique_index_names(self):
        idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
                                     names=['c1', 'c1'])
        df = DataFrame([1, 2], index=idx)
        with tm.assertRaises(ValueError):
            df.unstack('c1')

        with tm.assertRaises(ValueError):
            df.T.stack('c1')
Ejemplo n.º 28
0
    def test_data_fail(self):
        from lxml.etree import XMLSyntaxError
        spam_data = os.path.join(DATA_PATH, 'spam.html')
        banklist_data = os.path.join(DATA_PATH, 'banklist.html')

        with tm.assertRaises(XMLSyntaxError):
            self.read_html(spam_data)

        with tm.assertRaises(XMLSyntaxError):
            self.read_html(banklist_data)
Ejemplo n.º 29
0
    def test_spline_error(self):
        tm._skip_if_no_scipy()

        s = pd.Series(np.arange(10) ** 2)
        s[np.random.randint(0, 9, 3)] = np.nan
        with tm.assertRaises(ValueError):
            s.interpolate(method='spline')

        with tm.assertRaises(ValueError):
            s.interpolate(method='spline', order=0)
Ejemplo n.º 30
0
    def test_hist_layout(self):
        n = 10
        gender = tm.choice(['Male', 'Female'], size=n)
        df = DataFrame({'gender': gender,
                        'height': random.normal(66, 4, size=n), 'weight':
                        random.normal(161, 32, size=n)})
        with tm.assertRaises(ValueError):
            df.height.hist(layout=(1, 1))

        with tm.assertRaises(ValueError):
            df.height.hist(layout=[1, 1])
Ejemplo n.º 31
0
 def test_interp_nan_idx(self):
     df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]})
     df = df.set_index('A')
     with tm.assertRaises(NotImplementedError):
         df.interpolate(method='values')
Ejemplo n.º 32
0
 def test_to_gbq_with_no_project_id_given_should_fail(self):
     with tm.assertRaises(TypeError):
         gbq.to_gbq(DataFrame(), 'dataset.tablename')
Ejemplo n.º 33
0
 def test_describe_quantiles_both(self):
     with tm.assertRaises(ValueError):
         tm.makeDataFrame().describe(percentile_width=50,
                                     percentiles=[25, 75])
Ejemplo n.º 34
0
 def test_describe_percentiles_percent_or_raw(self):
     df = tm.makeDataFrame()
     with tm.assertRaises(ValueError):
         df.describe(percentiles=[10, 50, 100])
Ejemplo n.º 35
0
 def test_describe_raises(self):
     with tm.assertRaises(NotImplementedError):
         tm.makePanel().describe()
Ejemplo n.º 36
0
 def test_interpolate_non_ts(self):
     s = Series([1, 3, np.nan, np.nan, np.nan, 11])
     with tm.assertRaises(ValueError):
         s.interpolate(method='time')
Ejemplo n.º 37
0
 def test_display_format_raises(self):
     df = pd.DataFrame(np.random.randn(2, 2))
     with tm.assertRaises(TypeError):
         df.style.format(5)
     with tm.assertRaises(TypeError):
         df.style.format(True)
Ejemplo n.º 38
0
 def test_bad_project_id(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELECT 1",
                      project_id='001',
                      private_key=_get_private_key_path())
Ejemplo n.º 39
0
 def test_bad_table_name(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]",
                      project_id=_get_project_id(),
                      private_key=_get_private_key_path())
Ejemplo n.º 40
0
 def test_plot_fails_with_dupe_color_and_style(self):
     x = Series(randn(2))
     with tm.assertRaises(ValueError):
         x.plot(style='k--', color='k')
Ejemplo n.º 41
0
 def test_bad_url_protocol(self):
     with tm.assertRaises(URLError):
         self.read_html('git://github.com', match='.*Water.*')
Ejemplo n.º 42
0
def test_invalid_flavor():
    url = 'google.com'
    with tm.assertRaises(ValueError):
        read_html(url, 'google', flavor='not a* valid**++ flaver')
Ejemplo n.º 43
0
    def test_fred_multi_bad_series(self):

        names = ['NOTAREALSERIES', 'CPIAUCSL', "ALSO FAKE"]
        with tm.assertRaises(HTTPError):
            DataReader(names, data_source="fred")
Ejemplo n.º 44
0
 def test_bool_header_arg(self):
     # GH 6114
     for arg in [True, False]:
         with tm.assertRaises(TypeError):
             read_html(self.spam_data, header=arg)
Ejemplo n.º 45
0
 def test_interp_nonmono_raise(self):
     tm._skip_if_no_scipy()
     s = Series([1, np.nan, 3], index=[0, 2, 1])
     with tm.assertRaises(ValueError):
         s.interpolate(method='krogh')
Ejemplo n.º 46
0
 def test_malformed_query(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]",
                      project_id=_get_project_id(),
                      private_key=_get_private_key_path())
Ejemplo n.º 47
0
 def test_init_non_pandas(self):
     with tm.assertRaises(TypeError):
         Styler([1, 2, 3])
Ejemplo n.º 48
0
 def test_malformed_query(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]",
                      project_id=PROJECT_ID)
Ejemplo n.º 49
0
 def test_bad_table_name(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]",
                      project_id=PROJECT_ID)
Ejemplo n.º 50
0
 def test_read_gbq_with_empty_private_key_file_should_fail(self):
     with tm.ensure_clean() as empty_file_path:
         with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
             gbq.read_gbq('SELECT 1',
                          project_id='x',
                          private_key=empty_file_path)
Ejemplo n.º 51
0
    def test_tz_convert_and_localize(self):
        l0 = date_range('20140701', periods=5, freq='D')

        # TODO: l1 should be a PeriodIndex for testing
        #       after GH2106 is addressed
        with tm.assertRaises(NotImplementedError):
            period_range('20140701', periods=1).tz_convert('UTC')
        with tm.assertRaises(NotImplementedError):
            period_range('20140701', periods=1).tz_localize('UTC')
        # l1 = period_range('20140701', periods=5, freq='D')
        l1 = date_range('20140701', periods=5, freq='D')

        int_idx = Index(range(5))

        for fn in ['tz_localize', 'tz_convert']:

            if fn == 'tz_convert':
                l0 = l0.tz_localize('UTC')
                l1 = l1.tz_localize('UTC')

            for idx in [l0, l1]:

                l0_expected = getattr(idx, fn)('US/Pacific')
                l1_expected = getattr(idx, fn)('US/Pacific')

                df1 = DataFrame(np.ones(5), index=l0)
                df1 = getattr(df1, fn)('US/Pacific')
                self.assertTrue(df1.index.equals(l0_expected))

                # MultiIndex
                # GH7846
                df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))

                df3 = getattr(df2, fn)('US/Pacific', level=0)
                self.assertFalse(df3.index.levels[0].equals(l0))
                self.assertTrue(df3.index.levels[0].equals(l0_expected))
                self.assertTrue(df3.index.levels[1].equals(l1))
                self.assertFalse(df3.index.levels[1].equals(l1_expected))

                df3 = getattr(df2, fn)('US/Pacific', level=1)
                self.assertTrue(df3.index.levels[0].equals(l0))
                self.assertFalse(df3.index.levels[0].equals(l0_expected))
                self.assertTrue(df3.index.levels[1].equals(l1_expected))
                self.assertFalse(df3.index.levels[1].equals(l1))

                df4 = DataFrame(np.ones(5),
                                MultiIndex.from_arrays([int_idx, l0]))

                df5 = getattr(df4, fn)('US/Pacific', level=1)
                self.assertTrue(df3.index.levels[0].equals(l0))
                self.assertFalse(df3.index.levels[0].equals(l0_expected))
                self.assertTrue(df3.index.levels[1].equals(l1_expected))
                self.assertFalse(df3.index.levels[1].equals(l1))

        # Bad Inputs
        for fn in ['tz_localize', 'tz_convert']:
            # Not DatetimeIndex / PeriodIndex
            with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'):
                df = DataFrame(index=int_idx)
                df = getattr(df, fn)('US/Pacific')

            # Not DatetimeIndex / PeriodIndex
            with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'):
                df = DataFrame(np.ones(5),
                               MultiIndex.from_arrays([int_idx, l0]))
                df = getattr(df, fn)('US/Pacific', level=0)

            # Invalid level
            with tm.assertRaisesRegexp(ValueError, 'not valid'):
                df = DataFrame(index=l0)
                df = getattr(df, fn)('US/Pacific', level=1)
Ejemplo n.º 52
0
 def test_read_gbq_with_private_key_json_wrong_types_should_fail(self):
     with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
         gbq.read_gbq(
             'SELECT 1',
             project_id='x',
             private_key='{ "client_email" : 1, "private_key" : True }')
Ejemplo n.º 53
0
 def test_bad_project_id(self):
     with tm.assertRaises(gbq.GenericGBQException):
         gbq.read_gbq("SELECT 1", project_id='001')
Ejemplo n.º 54
0
 def test_read_gbq_with_empty_private_key_json_should_fail(self):
     with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
         gbq.read_gbq('SELECT 1', project_id='x', private_key='{}')
Ejemplo n.º 55
0
 def test_read_gbq_with_no_project_id_given_should_fail(self):
     with tm.assertRaises(TypeError):
         gbq.read_gbq('SELECT "1" as NUMBER_1')
Ejemplo n.º 56
0
 def test_invalid_kind(self):
     s = Series([1, 2])
     with tm.assertRaises(ValueError):
         s.plot(kind='aasdf')
Ejemplo n.º 57
0
    def test_to_csv_multiindex(self):

        pname = '__tmp_to_csv_multiindex__'
        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(pname) as path:

            frame.to_csv(path, header=False)
            frame.to_csv(path, columns=['A', 'B'])

            # round trip
            frame.to_csv(path)
            df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False)

            # TODO to_csv drops column name
            assert_frame_equal(frame, df, check_names=False)
            self.assertEqual(frame.index.names, df.index.names)

            # needed if setUP becomes a classmethod
            self.frame.index = old_index

            # try multiindex with dates
            tsframe = self.tsframe
            old_index = tsframe.index
            new_index = [old_index, np.arange(len(old_index))]
            tsframe.index = MultiIndex.from_arrays(new_index)

            tsframe.to_csv(path, index_label=['time', 'foo'])
            recons = DataFrame.from_csv(path, index_col=[0, 1])
            # TODO to_csv drops column name
            assert_frame_equal(tsframe, recons, check_names=False)

            # do not load index
            tsframe.to_csv(path)
            recons = DataFrame.from_csv(path, index_col=None)
            np.testing.assert_equal(
                len(recons.columns), len(tsframe.columns) + 2)

            # no index
            tsframe.to_csv(path, index=False)
            recons = DataFrame.from_csv(path, index_col=None)
            assert_almost_equal(recons.values, self.tsframe.values)

            # needed if setUP becomes classmethod
            self.tsframe.index = old_index

        with ensure_clean(pname) as path:
            # GH3571, GH1651, GH3141

            def _make_frame(names=None):
                if names is True:
                    names = ['first', 'second']
                return DataFrame(np.random.randint(0, 10, size=(3, 3)),
                                 columns=MultiIndex.from_tuples(
                                     [('bah', 'foo'),
                                      ('bah', 'bar'),
                                      ('ban', 'baz')], names=names),
                                 dtype='int64')

            # column & index are multi-index
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[
                              0, 1], tupleize_cols=False)
            assert_frame_equal(df, result)

            # column is mi
            df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(
                path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False)
            assert_frame_equal(df, result)

            # dup column names?
            df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[
                              0, 1, 2], tupleize_cols=False)
            assert_frame_equal(df, result)

            # writing with no index
            df = _make_frame()
            df.to_csv(path, tupleize_cols=False, index=False)
            result = read_csv(path, header=[0, 1], tupleize_cols=False)
            assert_frame_equal(df, result)

            # we lose the names here
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False, index=False)
            result = read_csv(path, header=[0, 1], tupleize_cols=False)
            self.assertTrue(all([x is None for x in result.columns.names]))
            result.columns.names = df.columns.names
            assert_frame_equal(df, result)

            # tupleize_cols=True and index=False
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=True, index=False)
            result = read_csv(
                path, header=0, tupleize_cols=True, index_col=None)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # whatsnew example
            df = _make_frame()
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path, header=[0, 1], index_col=[
                              0], tupleize_cols=False)
            assert_frame_equal(df, result)

            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path, header=[0, 1], index_col=[
                              0], tupleize_cols=False)
            assert_frame_equal(df, result)

            # column & index are multi-index (compatibility)
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=True)
            result = read_csv(path, header=0, index_col=[
                              0, 1], tupleize_cols=True)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # invalid options
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False)

            # catch invalid headers
            with assertRaisesRegexp(CParserError,
                                    'Passed header=\[0,1,2\] are too many '
                                    'rows for this multi_index of columns'):
                read_csv(path, tupleize_cols=False,
                         header=lrange(3), index_col=0)

            with assertRaisesRegexp(CParserError,
                                    'Passed header=\[0,1,2,3,4,5,6\], len of '
                                    '7, but only 6 lines in file'):
                read_csv(path, tupleize_cols=False,
                         header=lrange(7), index_col=0)

            for i in [4, 5, 6]:
                with tm.assertRaises(CParserError):
                    read_csv(path, tupleize_cols=False,
                             header=lrange(i), index_col=0)

            # write with cols
            with assertRaisesRegexp(TypeError, 'cannot specify cols with a '
                                    'MultiIndex'):
                df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar'])

        with ensure_clean(pname) as path:
            # empty
            tsframe[:0].to_csv(path)
            recons = DataFrame.from_csv(path)
            exp = tsframe[:0]
            exp.index = []

            self.assertTrue(recons.columns.equals(exp.columns))
            self.assertEqual(len(recons), 0)
Ejemplo n.º 58
0
 def test_bad_deprecate_kwarg(self):
     with tm.assertRaises(TypeError):
         @deprecate_kwarg('old', 'new', 0)
         def f4(new=None):
             pass
Ejemplo n.º 59
0
 def test_to_gbq_should_fail_if_invalid_table_name_passed(self):
     with tm.assertRaises(gbq.NotFoundException):
         gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234")
Ejemplo n.º 60
0
    def test_get_loc(self):
        idx = pd.date_range('2000-01-01', periods=3)

        for method in [None, 'pad', 'backfill', 'nearest']:
            self.assertEqual(idx.get_loc(idx[1], method), 1)
            self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1)
            self.assertEqual(idx.get_loc(str(idx[1]), method), 1)
            if method is not None:
                self.assertEqual(
                    idx.get_loc(idx[1],
                                method,
                                tolerance=pd.Timedelta('0 days')), 1)

        self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1)

        self.assertEqual(
            idx.get_loc('2000-01-01T12', method='nearest', tolerance='1 day'),
            1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=pd.Timedelta('1D')), 1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=np.timedelta64(1, 'D')), 1)
        self.assertEqual(
            idx.get_loc('2000-01-01T12',
                        method='nearest',
                        tolerance=timedelta(1)), 1)
        with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
            idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo')
        with tm.assertRaises(KeyError):
            idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours')

        self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3))
        self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3))

        self.assertEqual(idx.get_loc('1999', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2001', method='nearest'), 2)

        with tm.assertRaises(KeyError):
            idx.get_loc('1999', method='pad')
        with tm.assertRaises(KeyError):
            idx.get_loc('2001', method='backfill')

        with tm.assertRaises(KeyError):
            idx.get_loc('foobar')
        with tm.assertRaises(TypeError):
            idx.get_loc(slice(2))

        idx = pd.to_datetime(['2000-01-01', '2000-01-04'])
        self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0)
        self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1)
        self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2))

        # time indexing
        idx = pd.date_range('2000-01-01', periods=24, freq='H')
        tm.assert_numpy_array_equal(idx.get_loc(time(12)),
                                    np.array([12]),
                                    check_dtype=False)
        tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)),
                                    np.array([]),
                                    check_dtype=False)
        with tm.assertRaises(NotImplementedError):
            idx.get_loc(time(12, 30), method='pad')