Beispiel #1
0
def test_read_zipped_json():
    uncompressed_path = tm.get_data_path("tsframe_v012.json")
    uncompressed_df = pd.read_json(uncompressed_path)

    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
    compressed_df = pd.read_json(compressed_path, compression='zip')

    assert_frame_equal(uncompressed_df, compressed_df)
Beispiel #2
0
    def test_parse_public_s3_bucket(self):
        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
            df = read_csv('s3://pandas-test/tips.csv' +
                          ext, compression=comp)
            self.assertTrue(isinstance(df, DataFrame))
            self.assertFalse(df.empty)
            tm.assert_frame_equal(read_csv(
                tm.get_data_path('tips.csv')), df)

        # Read public file from bucket with not-public contents
        df = read_csv('s3://cant_get_it/tips.csv')
        self.assertTrue(isinstance(df, DataFrame))
        self.assertFalse(df.empty)
        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
Beispiel #3
0
    def test_categorical_dtype_encoding(self):
        # GH 10153
        pth = tm.get_data_path("unicode_series.csv")
        encoding = "latin-1"
        expected = self.read_csv(pth, header=None, encoding=encoding)
        expected[1] = Categorical(expected[1])
        actual = self.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"})
        tm.assert_frame_equal(actual, expected)

        pth = tm.get_data_path("utf16_ex.txt")
        encoding = "utf-16"
        expected = self.read_table(pth, encoding=encoding)
        expected = expected.apply(Categorical)
        actual = self.read_table(pth, encoding=encoding, dtype="category")
        tm.assert_frame_equal(actual, expected)
Beispiel #4
0
 def test_parse_public_s3a_bucket(self):
     # Read from AWS s3 as "s3a" URL
     df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
     self.assertTrue(isinstance(df, DataFrame))
     self.assertFalse(df.empty)
     tm.assert_frame_equal(read_csv(
         tm.get_data_path('tips.csv')).iloc[:10], df)
Beispiel #5
0
def test_pickles(current_pickle_data, version, f):
    if not is_platform_little_endian():
        pytest.skip("known failure on non-little endian")

    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
    with catch_warnings(record=True):
        compare(current_pickle_data, vf, version)
    def setUp(self):

        if sys.version_info < (2, 7, 0):
            import nose
            raise nose.SkipTest("Doesn't support Python 2.6 because of ElementTree incompat")

        self.dirpath = tm.get_data_path()
Beispiel #7
0
 def test_parse_public_s3n_bucket(self):
     # Read from AWS s3 as "s3n" URL
     df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
     assert isinstance(df, DataFrame)
     assert not df.empty
     tm.assert_frame_equal(read_csv(
         tm.get_data_path('tips.csv')).iloc[:10], df)
Beispiel #8
0
def legacy_packers_versions():
    # yield the packers versions
    path = tm.get_data_path('legacy_msgpack')
    for v in os.listdir(path):
        p = os.path.join(path, v)
        if os.path.isdir(p):
            yield v
def legacy_pickle_versions():
    # yield the pickle versions
    path = tm.get_data_path('legacy_pickle')
    for v in os.listdir(path):
        p = os.path.join(path, v)
        if os.path.isdir(p):
            yield v
Beispiel #10
0
    def test_categorical_dtype_encoding(self):
        # GH 10153
        pth = tm.get_data_path('unicode_series.csv')
        encoding = 'latin-1'
        expected = self.read_csv(pth, header=None, encoding=encoding)
        expected[1] = Categorical(expected[1])
        actual = self.read_csv(pth, header=None, encoding=encoding,
                               dtype={1: 'category'})
        tm.assert_frame_equal(actual, expected)

        pth = tm.get_data_path('utf16_ex.txt')
        encoding = 'utf-16'
        expected = self.read_table(pth, encoding=encoding)
        expected = expected.apply(Categorical)
        actual = self.read_table(pth, encoding=encoding, dtype='category')
        tm.assert_frame_equal(actual, expected)
Beispiel #11
0
 def test_parse_public_s3_bucket_chunked(self):
     # Read with a chunksize
     chunksize = 5
     local_tips = read_csv(tm.get_data_path('tips.csv'))
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         if comp == 'bz2' and compat.PY2:
             # The Python 2 C parser can't read bz2 from S3.
             self.assertRaises(ValueError, read_csv,
                               's3://pandas-test/tips.csv' + ext,
                               compression=comp)
         else:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
             self.assertEqual(df_reader.chunksize, chunksize)
             for i_chunk in [0, 1, 2]:
                 # Read a couple of chunks and make sure we see them
                 # properly.
                 df = df_reader.get_chunk()
                 self.assertTrue(isinstance(df, DataFrame))
                 self.assertFalse(df.empty)
                 true_df = local_tips.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 # Chunking doesn't preserve row numbering
                 true_df = true_df.reset_index().drop('index', axis=1)
                 tm.assert_frame_equal(true_df, df)
Beispiel #12
0
def test_12659():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test_12659.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "test_12659.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0)
Beispiel #13
0
    def test_read_pickles_0_11_0(self):
        if not is_little_endian():
            raise nose.SkipTest("known failure of test_read_pickles_0_11_0 on non-little endian")

        pth = tm.get_data_path('legacy_pickle/0.11.0')
        for f in os.listdir(pth):
            vf = os.path.join(pth,f)
            self.compare(vf)
Beispiel #14
0
 def test_parse_public_s3_bucket_nrows_python(self):
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                       nrows=10, compression=comp)
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
Beispiel #15
0
    def read_pickles(self, version):
        if not is_little_endian():
            raise nose.SkipTest("known failure on non-little endian")

        pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
        for f in os.listdir(pth):
            vf = os.path.join(pth,f)
            self.compare(vf)
Beispiel #16
0
 def test_url(self):
     # HTTP(S)
     url = "https://raw.github.com/pydata/pandas/master/" "pandas/io/tests/parser/data/salary.table.csv"
     url_table = self.read_table(url)
     dirpath = tm.get_data_path()
     localtable = os.path.join(dirpath, "salary.table.csv")
     local_table = self.read_table(localtable)
     tm.assert_frame_equal(url_table, local_table)
Beispiel #17
0
def test_airline():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "airline.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "airline.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0, check_exact=False)
Beispiel #18
0
 def test_parse_public_s3_bucket_nrows(self):
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         df = read_csv('s3://pandas-test/tips.csv' +
                       ext, nrows=10, compression=comp)
         self.assertTrue(isinstance(df, DataFrame))
         self.assertFalse(df.empty)
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
Beispiel #19
0
 def test_infer_s3_compression(self):
     for ext in ['', '.gz', '.bz2']:
         df = read_csv('s3://pandas-test/tips.csv' + ext,
                       engine='python', compression='infer')
         self.assertTrue(isinstance(df, DataFrame))
         self.assertFalse(df.empty)
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')), df)
Beispiel #20
0
 def test_infer_s3_compression(self, s3_resource):
     for ext in ['', '.gz', '.bz2']:
         df = read_csv('s3://pandas-test/tips.csv' + ext,
                       engine='python', compression='infer')
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')), df)
Beispiel #21
0
    def test_parse_public_s3_bucket(self):
        pytest.importorskip('s3fs')
        # more of an integration test due to the not-public contents portion
        # can probably mock this though.
        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
            df = read_csv('s3://pandas-test/tips.csv' +
                          ext, compression=comp)
            assert isinstance(df, DataFrame)
            assert not df.empty
            tm.assert_frame_equal(read_csv(
                tm.get_data_path('tips.csv')), df)

        # Read public file from bucket with not-public contents
        df = read_csv('s3://cant_get_it/tips.csv')
        assert isinstance(df, DataFrame)
        assert not df.empty
        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
Beispiel #22
0
def test_date_time():
    # Support of different SAS date/datetime formats (PR #15871)
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "datetime.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "datetime.csv")
    df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
                                          'DateTimeHi', 'Taiw'])
    tm.assert_frame_equal(df, df0)
Beispiel #23
0
 def test_msgpack(self):
     msgpack_path = tm.get_data_path('legacy_msgpack')
     n = 0
     for v in os.listdir(msgpack_path):
         pth = os.path.join(msgpack_path, v)
         if os.path.isdir(pth):
             yield self.read_msgpacks, v
         n += 1
     assert n > 0, 'Msgpack files are not tested'
Beispiel #24
0
    def read_msgpacks(self, version):

        pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version)))
        n = 0
        for f in os.listdir(pth):
            vf = os.path.join(pth, f)
            self.compare(vf, version)
            n += 1
        assert n > 0, 'Msgpack files are not tested'
Beispiel #25
0
def test_productsales():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "productsales.sas7bdat")
    df = pd.read_sas(fname, encoding='utf-8')
    fname = os.path.join(dirpath, "productsales.csv")
    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
    df0[vn] = df0[vn].astype(np.float64)
    tm.assert_frame_equal(df, df0)
Beispiel #26
0
 def setUp(self):
     self.dirpath = tm.get_data_path()
     self.csv1 = os.path.join(self.dirpath, "test1.csv")
     self.csv2 = os.path.join(self.dirpath, "test2.csv")
     self.xls1 = os.path.join(self.dirpath, "test.xls")
     self.frame = _frame.copy()
     self.frame2 = _frame2.copy()
     self.tsframe = _tsframe.copy()
     self.mixed_frame = _mixed_frame.copy()
Beispiel #27
0
 def test_pickles(self):
     pickle_path = tm.get_data_path('legacy_pickle')
     n = 0
     for v in os.listdir(pickle_path):
         pth = os.path.join(pickle_path, v)
         if os.path.isdir(pth):
             yield self.read_pickles, v
         n += 1
     assert n > 0, 'Pickle files are not tested'
Beispiel #28
0
 def read_data(self, name, dedupe=False):
     path = os.path.join(tm.get_data_path(), name)
     x = read_csv(path)
     if dedupe:
         x = (x.drop_duplicates(['time', 'ticker'], keep='last')
               .reset_index(drop=True)
              )
     x.time = to_datetime(x.time)
     return x
Beispiel #29
0
    def setUp(self):
        # Unit test datasets for dta7 - dta9 (old stata formats 104, 105 and 107) can be downloaded from:
        # http://stata-press.com/data/glmext.html
        self.dirpath = tm.get_data_path()
        self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
        self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')

        self.dta2_113 = os.path.join(self.dirpath, 'stata2_113.dta')
        self.dta2_114 = os.path.join(self.dirpath, 'stata2_114.dta')
        self.dta2_115 = os.path.join(self.dirpath, 'stata2_115.dta')
        self.dta2_117 = os.path.join(self.dirpath, 'stata2_117.dta')

        self.dta3_113 = os.path.join(self.dirpath, 'stata3_113.dta')
        self.dta3_114 = os.path.join(self.dirpath, 'stata3_114.dta')
        self.dta3_115 = os.path.join(self.dirpath, 'stata3_115.dta')
        self.dta3_117 = os.path.join(self.dirpath, 'stata3_117.dta')
        self.csv3 = os.path.join(self.dirpath, 'stata3.csv')

        self.dta4_113 = os.path.join(self.dirpath, 'stata4_113.dta')
        self.dta4_114 = os.path.join(self.dirpath, 'stata4_114.dta')
        self.dta4_115 = os.path.join(self.dirpath, 'stata4_115.dta')
        self.dta4_117 = os.path.join(self.dirpath, 'stata4_117.dta')

        self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
        self.csv7 = os.path.join(self.dirpath, 'cancer.csv')

        self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')

        self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')

        self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
        self.csv9 = os.path.join(self.dirpath, 'lbw.csv')

        self.dta_encoding = os.path.join(self.dirpath, 'stata1_encoding.dta')

        self.csv14 = os.path.join(self.dirpath, 'stata5.csv')
        self.dta14_113 = os.path.join(self.dirpath, 'stata5_113.dta')
        self.dta14_114 = os.path.join(self.dirpath, 'stata5_114.dta')
        self.dta14_115 = os.path.join(self.dirpath, 'stata5_115.dta')
        self.dta14_117 = os.path.join(self.dirpath, 'stata5_117.dta')

        self.csv15 = os.path.join(self.dirpath, 'stata6.csv')
        self.dta15_113 = os.path.join(self.dirpath, 'stata6_113.dta')
        self.dta15_114 = os.path.join(self.dirpath, 'stata6_114.dta')
        self.dta15_115 = os.path.join(self.dirpath, 'stata6_115.dta')
        self.dta15_117 = os.path.join(self.dirpath, 'stata6_117.dta')

        self.dta16_115 = os.path.join(self.dirpath, 'stata7_115.dta')
        self.dta16_117 = os.path.join(self.dirpath, 'stata7_117.dta')

        self.dta17_113 = os.path.join(self.dirpath, 'stata8_113.dta')
        self.dta17_115 = os.path.join(self.dirpath, 'stata8_115.dta')
        self.dta17_117 = os.path.join(self.dirpath, 'stata8_117.dta')

        self.dta18_115 = os.path.join(self.dirpath, 'stata9_115.dta')
        self.dta18_117 = os.path.join(self.dirpath, 'stata9_117.dta')
Beispiel #30
0
    def test_read_from_http_url(self):
        _skip_if_no_xlrd()

        url = ('https://raw.github.com/pydata/pandas/master/'
               'pandas/io/tests/data/test.xlsx')
        url_table = read_excel(url)
        dirpath = tm.get_data_path()
        localtable = os.path.join(dirpath, 'test.xlsx')
        local_table = read_excel(localtable)
        tm.assert_frame_equal(url_table, local_table)
Beispiel #31
0
def test_pickles(current_pickle_data, version):
    if not is_platform_little_endian():
        pytest.skip("known failure on non-little endian")

    pth = tm.get_data_path('legacy_pickle/{0}'.format(version))
    n = 0
    for f in os.listdir(pth):
        vf = os.path.join(pth, f)
        data = compare(current_pickle_data, vf, version)

        if data is None:
            continue
        n += 1
    assert n > 0, 'Pickle files are not tested'
Beispiel #32
0
    def read_pickles(self, version):
        if not is_platform_little_endian():
            raise nose.SkipTest("known failure on non-little endian")

        pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
        n = 0
        for f in os.listdir(pth):
            vf = os.path.join(pth, f)
            data = self.compare(vf, version)

            if data is None:
                continue
            n += 1
        assert n > 0, 'Pickle files are not tested'
Beispiel #33
0
    def setUp(self):
        # Unit test datasets for dta7 - dta9 (old stata formats 104, 105 and 107) can be downloaded from:
        # http://stata-press.com/data/glmext.html
        self.dirpath = tm.get_data_path()
        self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
        self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')

        self.dta2_113 = os.path.join(self.dirpath, 'stata2_113.dta')
        self.dta2_114 = os.path.join(self.dirpath, 'stata2_114.dta')
        self.dta2_115 = os.path.join(self.dirpath, 'stata2_115.dta')
        self.dta2_117 = os.path.join(self.dirpath, 'stata2_117.dta')

        self.dta3_113 = os.path.join(self.dirpath, 'stata3_113.dta')
        self.dta3_114 = os.path.join(self.dirpath, 'stata3_114.dta')
        self.dta3_115 = os.path.join(self.dirpath, 'stata3_115.dta')
        self.dta3_117 = os.path.join(self.dirpath, 'stata3_117.dta')
        self.csv3 = os.path.join(self.dirpath, 'stata3.csv')

        self.dta4_113 = os.path.join(self.dirpath, 'stata4_113.dta')
        self.dta4_114 = os.path.join(self.dirpath, 'stata4_114.dta')
        self.dta4_115 = os.path.join(self.dirpath, 'stata4_115.dta')
        self.dta4_117 = os.path.join(self.dirpath, 'stata4_117.dta')

        self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
        self.csv7 = os.path.join(self.dirpath, 'cancer.csv')

        self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')

        self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')

        self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
        self.csv9 = os.path.join(self.dirpath, 'lbw.csv')

        self.dta_encoding = os.path.join(self.dirpath, 'stata1_encoding.dta')

        self.csv14 = os.path.join(self.dirpath, 'stata5.csv')
        self.dta14_113 = os.path.join(self.dirpath, 'stata5_113.dta')
        self.dta14_114 = os.path.join(self.dirpath, 'stata5_114.dta')
        self.dta14_115 = os.path.join(self.dirpath, 'stata5_115.dta')
        self.dta14_117 = os.path.join(self.dirpath, 'stata5_117.dta')

        self.csv15 = os.path.join(self.dirpath, 'stata6.csv')
        self.dta15_113 = os.path.join(self.dirpath, 'stata6_113.dta')
        self.dta15_114 = os.path.join(self.dirpath, 'stata6_114.dta')
        self.dta15_115 = os.path.join(self.dirpath, 'stata6_115.dta')
        self.dta15_117 = os.path.join(self.dirpath, 'stata6_117.dta')

        self.dta16_115 = os.path.join(self.dirpath, 'stata7_115.dta')
        self.dta16_117 = os.path.join(self.dirpath, 'stata7_117.dta')
Beispiel #34
0
    def setUpClass(self):
        # Integration tests require a valid bigquery token
        # be present in the user's home directory. This
        # can be generated with 'bq init' in the command line
        self.dirpath = tm.get_data_path()
        home = os.path.expanduser("~")
        self.bq_token = os.path.join(home, '.bigquery.v2.token')
        self.fake_job_path = os.path.join(self.dirpath, 'gbq_fake_job.txt')

        # If we're using a valid token, make a test dataset
        # Note, dataset functionality is beyond the scope
        # of the module under test, so we rely on the command
        # line utility for this.
        if os.path.exists(self.bq_token):
            subprocess.call(['bq', 'mk', '-d', 'pandas_testing_dataset'])
Beispiel #35
0
def test_pickle_v0_15_2():
    # ordered -> _ordered
    # GH 9347

    cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                         categories=['a', 'b', 'c', 'd'])
    pickle_path = os.path.join(tm.get_data_path(),
                               'categorical_0_15_2.pickle')
    # This code was executed once on v0.15.2 to generate the pickle:
    #
    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
    #                   name='foobar')
    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
    #
    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
Beispiel #36
0
 def setUp(self):
     # Unit test datasets for dta7 - dta9 (old stata formats 104, 105 and 107) can be downloaded from:
     # http://stata-press.com/data/glmext.html
     self.dirpath = tm.get_data_path()
     self.dta1 = os.path.join(self.dirpath, 'stata1.dta')
     self.dta2 = os.path.join(self.dirpath, 'stata2.dta')
     self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
     self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
     self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
     self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
     self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
     self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
     self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
     self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
     self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
Beispiel #37
0
    def read_pickles(self, version):
        if not is_little_endian():
            raise nose.SkipTest("known failure on non-little endian")

        pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
        for f in os.listdir(pth):
            vf = os.path.join(pth,f)
            data = self.compare(vf)

            if data is None:
                continue

            if 'series' in data:
                if 'ts' in data['series']:
                    self._validate_timeseries(data['series']['ts'], self.data['series']['ts'])
Beispiel #38
0
    def setUpClass(cls):
        super(TestYahooOptions, cls).setUpClass()

        # aapl has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        today = datetime.today()
        cls.year = today.year
        cls.month = today.month + 1
        if cls.month > 12:  # pragma: no cover
            cls.month = 1
            cls.year = cls.year + 1
        cls.expiry = datetime(cls.year, cls.month, 1)
        cls.dirpath = tm.get_data_path()
        cls.json1 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options1.json')
        cls.json2 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options2.json')  # Empty table GH#22
        cls.data1 = cls.aapl._process_data(cls.aapl._parse_url(cls.json1))
Beispiel #39
0
 def test_parse_public_s3_bucket_nrows(self):
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         if comp == 'bz2' and compat.PY2:
             # The Python 2 C parser can't read bz2 from S3.
             self.assertRaises(ValueError,
                               read_csv,
                               's3://pandas-test/tips.csv' + ext,
                               compression=comp)
         else:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           nrows=10,
                           compression=comp)
             self.assertTrue(isinstance(df, DataFrame))
             self.assertFalse(df.empty)
             tm.assert_frame_equal(
                 read_csv(tm.get_data_path('tips.csv')).iloc[:10], df)
Beispiel #40
0
    def test_file(self):

        # FILE
        if sys.version_info[:2] < (2, 6):
            raise nose.SkipTest("file:// not supported with Python < 2.6")
        dirpath = tm.get_data_path()
        localtable = os.path.join(dirpath, 'salary.table.csv')
        local_table = self.read_table(localtable)

        try:
            url_table = self.read_table('file://localhost/' + localtable)
        except URLError:
            # fails on some systems
            raise nose.SkipTest("failing on %s" %
                                ' '.join(platform.uname()).strip())

        tm.assert_frame_equal(url_table, local_table)
Beispiel #41
0
    def boto3_client_s3(self):
        # see gh-16135

        # boto3 is a dependency of s3fs
        import boto3
        client = boto3.client("s3")

        key = "/tips.csv"
        bucket = "pandas-test"
        s3_object = client.get_object(Bucket=bucket, Key=key)

        result = read_csv(s3_object["Body"])
        assert isinstance(result, DataFrame)
        assert not result.empty

        expected = read_csv(tm.get_data_path('tips.csv'))
        tm.assert_frame_equal(result, expected)
def test_pickle_v0_14_1():

    # we have the name warning
    # 10482
    with tm.assert_produces_warning(UserWarning):
        cat = pd.Categorical(values=['a', 'b', 'c'],
                             categories=['a', 'b', 'c', 'd'],
                             name='foobar',
                             ordered=False)
    pickle_path = os.path.join(tm.get_data_path(), 'categorical_0_14_1.pickle')
    # This code was executed once on v0.14.1 to generate the pickle:
    #
    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
    #                   name='foobar')
    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
    #
    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
Beispiel #43
0
def test_encoding_options():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode())
Beispiel #44
0
 def test_parse_public_s3_bucket_chunked_python(self):
     # Read with a chunksize using the Python parser
     chunksize = 5
     local_tips = read_csv(tm.get_data_path('tips.csv'))
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                              chunksize=chunksize, compression=comp,
                              engine='python')
         self.assertEqual(df_reader.chunksize, chunksize)
         for i_chunk in [0, 1, 2]:
             # Read a couple of chunks and make sure we see them properly.
             df = df_reader.get_chunk()
             self.assertTrue(isinstance(df, DataFrame))
             self.assertFalse(df.empty)
             true_df = local_tips.iloc[
                 chunksize * i_chunk: chunksize * (i_chunk + 1)]
             tm.assert_frame_equal(true_df, df)
Beispiel #45
0
 def test_parse_public_s3_bucket_chunked(self):
     # Read with a chunksize
     chunksize = 5
     local_tips = read_csv(tm.get_data_path('tips.csv'))
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                              chunksize=chunksize, compression=comp)
         assert df_reader.chunksize == chunksize
         for i_chunk in [0, 1, 2]:
             # Read a couple of chunks and make sure we see them
             # properly.
             df = df_reader.get_chunk()
             assert isinstance(df, DataFrame)
             assert not df.empty
             true_df = local_tips.iloc[
                 chunksize * i_chunk: chunksize * (i_chunk + 1)]
             tm.assert_frame_equal(true_df, df)
Beispiel #46
0
    def read_msgpacks(self, version):

        pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version)))
        n = 0
        for f in os.listdir(pth):
            # GH12142 0.17 files packed in P2 can't be read in P3
            if (compat.PY3 and version.startswith('0.17.')
                    and f.split('.')[-4][-1] == '2'):
                continue
            vf = os.path.join(pth, f)
            try:
                self.compare(vf, version)
            except ImportError:
                # blosc not installed
                continue
            n += 1
        assert n > 0, 'Msgpack files are not tested'
Beispiel #47
0
    def setUpClass(cls):
        super(TestYahooOptions, cls).setUpClass()
        _skip_if_no_lxml()

        # aapl has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        today = datetime.today()
        cls.year = today.year
        cls.month = today.month + 1
        if cls.month > 12:
            cls.year = cls.year + 1
            cls.month = 1
        cls.expiry = datetime(cls.year, cls.month, 1)
        cls.dirpath = tm.get_data_path()
        cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
        cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
        cls.data1 = cls.aapl._option_frames_from_url(cls.html1)['puts']
Beispiel #48
0
    def setUpClass(cls):
        super(TestYahooOptions, cls).setUpClass()
        _skip_if_no_lxml()

        # aapl has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        d = (Timestamp.today() + pd.offsets.MonthBegin(1)).normalize()
        cls.year = d.year
        cls.month = d.month
        cls.expiry = d
        cls.expiry2 = d + pd.offsets.MonthBegin(1)
        cls.dirpath = tm.get_data_path()
        cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
        cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
        cls.html3 = os.path.join(cls.dirpath,
                                 'yahoo_options3.html')  #Empty table GH#22
        cls.data1 = cls.aapl._option_frames_from_url(cls.html1)['puts']
Beispiel #49
0
    def setUpClass(cls):
        super(TestYahooOptions, cls).setUpClass()
        _skip_if_no_lxml()

        # aapl has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        today = datetime.today()
        year = today.year
        month = today.month + 1
        if month > 12:
            year = year + 1
            month = 1
        cls.expiry = datetime(year, month, 1)
        cls.dirpath = tm.get_data_path()
        cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
        cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
        cls.root1 = cls.aapl._parse_url(cls.html1)
        cls.root2 = cls.aapl._parse_url(cls.html2)
Beispiel #50
0
    def test_read_from_file_url(self):
        _skip_if_no_xlrd()

        # FILE
        if sys.version_info[:2] < (2, 6):
            raise nose.SkipTest("file:// not supported with Python < 2.6")
        dirpath = tm.get_data_path()
        localtable = os.path.join(dirpath, 'test.xlsx')
        local_table = read_excel(localtable)

        try:
            url_table = read_excel('file://localhost/' + localtable)
        except URLError:
            # fails on some systems
            raise nose.SkipTest("failing on %s" %
                                ' '.join(platform.uname()).strip())

        tm.assert_frame_equal(url_table, local_table)
Beispiel #51
0
    def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
                             version):

        pth = tm.get_data_path("legacy_msgpack/{0}".format(version))
        n = 0
        for f in os.listdir(pth):
            # GH12142 0.17 files packed in P2 can't be read in P3
            if (compat.PY3 and version.startswith("0.17.")
                    and f.split(".")[-4][-1] == "2"):
                continue
            vf = os.path.join(pth, f)
            try:
                self.compare(current_packers_data, all_packers_data, vf,
                             version)
            except ImportError:
                # blosc not installed
                continue
            n += 1
        assert n > 0, "Msgpack files are not tested"
Beispiel #52
0
    def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
                             version):

        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
        n = 0
        for f in os.listdir(pth):
            # GH12142 0.17 files packed in P2 can't be read in P3
            if (compat.PY3 and version.startswith('0.17.')
                    and f.split('.')[-4][-1] == '2'):
                continue
            vf = os.path.join(pth, f)
            try:
                with catch_warnings(record=True):
                    self.compare(current_packers_data, all_packers_data, vf,
                                 version)
            except ImportError:
                # blosc not installed
                continue
            n += 1
        assert n > 0, 'Msgpack files are not tested'
    def setup_class(cls):
        # AAPL has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        today = datetime.today()
        cls.year = today.year
        cls.month = today.month + 1

        if cls.month > 12:  # pragma: no cover
            cls.month = 1
            cls.year = cls.year + 1

        cls.expiry = datetime(cls.year, cls.month, 1)
        cls.dirpath = tm.get_data_path()
        cls.json1 = 'file://' + os.path.join(
            cls.dirpath, 'yahoo_options1.json')

        # see gh-22: empty table
        cls.json2 = 'file://' + os.path.join(
            cls.dirpath, 'yahoo_options2.json')
        cls.data1 = cls.aapl._process_data(cls.aapl._parse_url(cls.json1))
Beispiel #54
0
    def setUp(self):
        self.dirpath = tm.get_data_path()

        self.ts = tm.makeTimeSeries()
        self.ts.name = 'ts'

        self.series = tm.makeStringSeries()
        self.series.name = 'series'

        self.objSeries = tm.makeObjectSeries()
        self.objSeries.name = 'objects'

        self.empty_series = Series([], index=[])
        self.empty_frame = DataFrame({})

        self.frame = _frame.copy()
        self.frame2 = _frame2.copy()
        self.intframe = _intframe.copy()
        self.tsframe = _tsframe.copy()
        self.mixed_frame = _mixed_frame.copy()
Beispiel #55
0
    def test_qcut_binning_issues(self):
        # #1978, 1979
        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
        arr = np.loadtxt(path)

        result = qcut(arr, 20)

        starts = []
        ends = []
        for lev in result.categories:
            s, e = lev[1:-1].split(',')

            self.assertTrue(s != e)

            starts.append(float(s))
            ends.append(float(e))

        for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
                                      zip(ends[:-1], ends[1:])):
            self.assertTrue(sp < sn)
            self.assertTrue(ep < en)
            self.assertTrue(ep <= sn)
Beispiel #56
0
    def setUpClass(cls):
        super(TestYahooOptions, cls).setUpClass()
        _skip_if_no_lxml()

        # aapl has monthlies
        cls.aapl = web.Options('aapl', 'yahoo')
        today = datetime.today()
        year = today.year
        month = today.month + 1
        if month > 12:
            year = year + 1
            month = 1
        cls.expiry = datetime(year, month, 1)
        cls.dirpath = tm.get_data_path()
        cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
        cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
        cls.root1 = cls.aapl._parse_url(cls.html1)
        cls.root2 = cls.aapl._parse_url(cls.html2)
        cls.tables1 = cls.aapl._parse_option_page_from_yahoo(cls.root1)
        cls.unprocessed_data1 = web._parse_options_data(
            cls.tables1[cls.aapl._TABLE_LOC['puts']])
        cls.data1 = cls.aapl._process_data(cls.unprocessed_data1, 'put')
Beispiel #57
0
    def test_qcut_binning_issues(self):
        # #1978, 1979
        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
        arr = np.loadtxt(path)

        result = qcut(arr, 20)

        starts = []
        ends = []
        for lev in np.unique(result):
            s = lev.left
            e = lev.right
            assert s != e

            starts.append(float(s))
            ends.append(float(e))

        for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
                                      zip(ends[:-1], ends[1:])):
            assert sp < sn
            assert ep < en
            assert ep <= sn
Beispiel #58
0
 def setUp(self):
     self.dirpath = tm.get_data_path()
     self.data = []
     self.test_ix = [list(range(1, 16)), [16]]
     for j in 1, 2:
         fname = os.path.join(self.dirpath, "test_sas7bdat_%d.csv" % j)
         df = pd.read_csv(fname)
         epoch = pd.datetime(1960, 1, 1)
         t1 = pd.to_timedelta(df["Column4"], unit='d')
         df["Column4"] = epoch + t1
         t2 = pd.to_timedelta(df["Column12"], unit='d')
         df["Column12"] = epoch + t2
         for k in range(df.shape[1]):
             col = df.iloc[:, k]
             if col.dtype == np.int64:
                 df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
             elif col.dtype == np.dtype('O'):
                 if PY2:
                     f = lambda x: (x.decode('utf-8')
                                    if isinstance(x, str) else x)
                     df.iloc[:, k] = df.iloc[:, k].apply(f)
         self.data.append(df)
Beispiel #59
0
 def test_parse_public_s3_bucket_chunked(self):
     # Read with a chunksize
     chunksize = 5
     local_tips = read_csv(tm.get_data_path('tips.csv'))
     for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
         if comp == 'bz2' and compat.PY2:
             # The Python 2 C parser can't read bz2 from S3.
             self.assertRaises(ValueError, read_csv,
                               's3://pandas-test/tips.csv' + ext,
                               compression=comp)
         else:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
             self.assertEqual(df_reader.chunksize, chunksize)
             for i_chunk in [0, 1, 2]:
                 # Read a couple of chunks and make sure we see them
                 # properly.
                 df = df_reader.get_chunk()
                 self.assertTrue(isinstance(df, DataFrame))
                 self.assertFalse(df.empty)
                 true_df = local_tips.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
Beispiel #60
0
    def read_pickles(self, version):
        if not is_little_endian():
            raise nose.SkipTest("known failure on non-little endian")

        pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
        n = 0
        for f in os.listdir(pth):
            vf = os.path.join(pth, f)
            data = self.compare(vf)

            if data is None:
                continue

            if 'series' in data:
                if 'ts' in data['series']:
                    self._validate_timeseries(data['series']['ts'],
                                              self.data['series']['ts'])
                    self._validate_frequency(data['series']['ts'])
            if 'index' in data:
                if 'period' in data['index']:
                    self._validate_periodindex(data['index']['period'],
                                               self.data['index']['period'])
            n += 1
        assert n > 0, 'Pickle files are not tested'