コード例 #1
0
ファイル: test_parsers.py プロジェクト: dzhou/pandas
 def test_url(self):
     # HTTP(S)
     url = 'https://raw.github.com/pydata/pandas/master/pandas/io/tests/salary.table'
     url_table = read_table(url)
     dirpath = curpath()
     localtable = os.path.join(dirpath, 'salary.table')
     local_table = read_table(localtable)
     assert_frame_equal(url_table, local_table)
コード例 #2
0
 def test_url(self):
     # HTTP(S)
     url = 'https://raw.github.com/pydata/pandas/master/pandas/io/tests/salary.table'
     url_table = read_table(url)
     dirpath = curpath()
     localtable = os.path.join(dirpath, 'salary.table')
     local_table = read_table(localtable)
     assert_frame_equal(url_table, local_table)
コード例 #3
0
    def test_file(self):
        # FILE
        if sys.version_info[:2] < (2, 6):
            raise nose.SkipTest("file:// not supported with Python < 2.6")
        dirpath = curpath()
        localtable = os.path.join(dirpath, 'salary.table')
        local_table = read_table(localtable)

        url_table = read_table('file://localhost/' + localtable)
        assert_frame_equal(url_table, local_table)
コード例 #4
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_file(self):
        # FILE
        if sys.version_info[:2] < (2, 6):
            raise nose.SkipTest("file:// not supported with Python < 2.6")
        dirpath = curpath()
        localtable = os.path.join(dirpath, 'salary.table')
        local_table = read_table(localtable)

        url_table = read_table('file://localhost/'+localtable)
        assert_frame_equal(url_table, local_table)
コード例 #5
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
 def test_url(self):
     # HTTP(S)
     url = "https://raw.github.com/pydata/pandas/master/pandas/io/tests/salary.table"
     url_table = read_table(url)
     dirpath = curpath()
     localtable = os.path.join(dirpath, "salary.table")
     local_table = read_table(localtable)
     assert_frame_equal(url_table, local_table)
     # FILE
     url_table = read_table("file://localhost/" + localtable)
     assert_frame_equal(url_table, local_table)
コード例 #6
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
    def test_no_header(self):
        data = """1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=",", header=None)
        names = ["foo", "bar", "baz", "quux", "panda"]
        df2 = read_table(StringIO(data), sep=",", header=None, names=names)
        expected = [[1, 2, 3, 4, 5.0], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
        assert_almost_equal(df.values, expected)
        assert_almost_equal(df.values, df2.values)
        self.assert_(np.array_equal(df.columns, ["X.1", "X.2", "X.3", "X.4", "X.5"]))
        self.assert_(np.array_equal(df2.columns, names))
コード例 #7
0
    def test_no_header(self):
        data = """1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=',', header=None)
        names = ['foo', 'bar', 'baz', 'quux', 'panda']
        df2 = read_table(StringIO(data), sep=',', header=None, names=names)
        expected = [[1, 2, 3, 4, 5.], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
        assert_almost_equal(df.values, expected)
        assert_almost_equal(df.values, df2.values)
        self.assert_(
            np.array_equal(df.columns, ['X.1', 'X.2', 'X.3', 'X.4', 'X.5']))
        self.assert_(np.array_equal(df2.columns, names))
コード例 #8
0
ファイル: test_parsers.py プロジェクト: ara818/pandas
    def test_no_header(self):
        data = """1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=',', header=None)
        names = ['foo', 'bar', 'baz', 'quux', 'panda']
        df2 = read_table(StringIO(data), sep=',', header=None, names=names)
        expected = [[1,2,3,4,5.],
                    [6,7,8,9,10],
                    [11,12,13,14,15]]
        assert_almost_equal(df.values, expected)
        self.assert_(np.array_equal(df.columns,
                                    ['X.1', 'X.2', 'X.3', 'X.4', 'X.5']))
        self.assert_(np.array_equal(df2.columns, names))
コード例 #9
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
 def test_read_csv_no_index_name(self):
     df = read_csv(self.csv2, index_col=0, parse_dates=True)
     df2 = read_table(self.csv2, sep=",", index_col=0, parse_dates=True)
     self.assert_(np.array_equal(df.columns, ["A", "B", "C", "D", "E"]))
     self.assert_(isinstance(df.index[0], datetime))
     self.assert_(df.ix[:, ["A", "B", "C", "D"]].values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #10
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass
    return read_table(StringIO(text), **kwargs)
コード例 #11
0
ファイル: test_parsers.py プロジェクト: bshanks/pandas
    def test_iterator(self):
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.get_chunk(3)
        assert_frame_equal(chunk, df[:3])

        last_chunk = reader.get_chunk(5)
        assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])

        treader = read_table(StringIO(self.data1), sep=',', index_col=0,
                             iterator=True)
        self.assert_(isinstance(treader, TextParser))
コード例 #12
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_iterator(self):
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True)
        df = read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.get_chunk(3)
        assert_frame_equal(chunk, df[:3])

        last_chunk = reader.get_chunk(5)
        assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        assert_frame_equal(chunks[0], df[1:3])

        # test bad parameter (skip_footer)
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True,
                          skip_footer=True)
        self.assertRaises(ValueError, reader.get_chunk, 3)

        treader = read_table(StringIO(self.data1), sep=',', index_col=0,
                             iterator=True)
        self.assert_(isinstance(treader, TextParser))
コード例 #13
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
 def test_read_csv_no_index_name(self):
     df = read_csv(self.csv2, index_col=0, parse_dates=True)
     df2 = read_table(self.csv2, sep=',', index_col=0, parse_dates=True)
     self.assert_(np.array_equal(df.columns, ['A', 'B', 'C', 'D', 'E']))
     self.assert_(isinstance(df.index[0], (datetime, np.datetime64, Timestamp)))
     self.assert_(df.ix[:, ['A', 'B', 'C', 'D']].values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #14
0
ファイル: clipboard.py プロジェクト: alephu5/Soundbyte
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass
    return read_table(StringIO(text), **kwargs)
コード例 #15
0
ファイル: test_date_converters.py プロジェクト: mindw/pandas
    def test_parse_date_time(self):
        result = conv.parse_date_time(self.dates, self.times)
        self.assert_((result == self.expected).all())

        data = """\
date, time, a, b
2001-01-05, 10:00:00, 0.0, 10.
2001-01-05, 00:00:00, 1., 11.
"""
        datecols = {'date_time': [0, 1]}
        df = read_table(StringIO(data),
                        sep=',',
                        header=0,
                        parse_dates=datecols,
                        date_parser=conv.parse_date_time)
        self.assert_('date_time' in df)
        self.assertEqual(df.date_time.ix[0], datetime(2001, 1, 5, 10, 0, 0))

        data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
                "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
                "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
                "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
                "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
                "KORD,19990127, 23:00:00, 22:56:00, -0.5900")

        date_spec = {'nominal': [1, 2], 'actual': [1, 3]}
        df = read_csv(StringIO(data),
                      header=None,
                      parse_dates=date_spec,
                      date_parser=conv.parse_date_time)
コード例 #16
0
    def test_parse_date_time(self):
        result = conv.parse_date_time(self.dates, self.times)
        self.assert_((result == self.expected).all())

        data = """\
date, time, a, b
2001-01-05, 10:00:00, 0.0, 10.
2001-01-05, 00:00:00, 1., 11.
"""
        datecols = {'date_time': [0, 1]}
        df = read_table(StringIO(data), sep=',', header=0,
                        parse_dates=datecols, date_parser=conv.parse_date_time)
        self.assert_('date_time' in df)
        self.assert_(df.date_time.ix[0] == datetime(2001, 1, 5, 10, 0, 0))

        data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
                "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
                "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
                "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
                "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
                "KORD,19990127, 23:00:00, 22:56:00, -0.5900")

        date_spec = {'nominal': [1, 2], 'actual': [1, 3]}
        df = read_csv(StringIO(data), header=None, parse_dates=date_spec,
                      date_parser=conv.parse_date_time)
コード例 #17
0
ファイル: test_parsers.py プロジェクト: GunioRobot/pandas
 def test_read_csv_no_index_name(self):
     df = read_csv(self.csv2)
     df2 = read_table(self.csv2, sep=',')
     self.assert_(np.array_equal(df.columns, ['A', 'B', 'C', 'D', 'E']))
     self.assert_(isinstance(df.index[0], datetime))
     self.assert_(df.ix[:, ['A', 'B', 'C', 'D']].values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #18
0
    def test_iterator(self):
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.get_chunk(3)
        assert_frame_equal(chunk, df[:3])

        last_chunk = reader.get_chunk(5)
        assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])

        treader = read_table(StringIO(self.data1),
                             sep=',',
                             index_col=0,
                             iterator=True)
        self.assert_(isinstance(treader, TextParser))
コード例 #19
0
 def test_generic(self):
     data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11."
     datecols = {"ym": [0, 1]}
     dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1)
     df = read_table(StringIO(data), sep=",", header=0, parse_dates=datecols, date_parser=dateconverter)
     self.assertIn("ym", df)
     self.assertEqual(df.ym.ix[0], date(2001, 1, 1))
コード例 #20
0
ファイル: test_parsers.py プロジェクト: kfatyas/pandas
    def test_iterator(self):
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True)
        df = read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.get_chunk(3)
        assert_frame_equal(chunk, df[:3])

        last_chunk = reader.get_chunk(5)
        assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        assert_frame_equal(chunks[0], df[1:3])

        # test bad parameter (skip_footer)
        reader = read_csv(StringIO(self.data1), index_col=0, iterator=True,
                          skip_footer=True)
        self.assertRaises(ValueError, reader.get_chunk, 3)

        treader = read_table(StringIO(self.data1), sep=',', index_col=0,
                             iterator=True)
        self.assert_(isinstance(treader, TextParser))
コード例 #21
0
ファイル: test_parsers.py プロジェクト: kfatyas/pandas
 def test_read_csv_no_index_name(self):
     df = read_csv(self.csv2, index_col=0, parse_dates=True)
     df2 = read_table(self.csv2, sep=',', index_col=0, parse_dates=True)
     self.assert_(np.array_equal(df.columns, ['A', 'B', 'C', 'D', 'E']))
     self.assert_(isinstance(df.index[0], datetime))
     self.assert_(df.ix[:, ['A', 'B', 'C', 'D']].values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #22
0
ファイル: test_parsers.py プロジェクト: kfatyas/pandas
    def test_no_unnamed_index(self):
        data = """ id c0 c1 c2
0 1 0 a b
1 2 0 c d
2 2 2 e f
"""
        df = read_table(StringIO(data), sep=' ')
        self.assert_(df.index.name is None)
コード例 #23
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_no_unnamed_index(self):
        data = """ id c0 c1 c2
0 1 0 a b
1 2 0 c d
2 2 2 e f
"""
        df = read_table(StringIO(data), sep=' ')
        self.assert_(df.index.name is None)
コード例 #24
0
ファイル: test_parsers.py プロジェクト: ogrisel/pandas
 def test_read_csv_dataframe(self):
     df = read_csv(self.csv1, index_col=0, parse_dates=True)
     df2 = read_table(self.csv1, sep=',', index_col=0, parse_dates=True)
     self.assert_(np.array_equal(df.columns, ['A', 'B', 'C', 'D']))
     self.assert_(df.index.name == 'index')
     self.assert_(isinstance(df.index[0], datetime))
     self.assert_(df.values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #25
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
    def test_duplicate_columns(self):
        data = """A,A,B,B,B
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=",")
        self.assert_(np.array_equal(df.columns, ["A", "A.1", "B", "B.1", "B.2"]))
コード例 #26
0
ファイル: clipboards.py プロジェクト: BobMcFry/pandas
def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = {x.lstrip().count('\t') for x in lines}
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = r'\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = r'\s+'

    return read_table(StringIO(text), sep=sep, **kwargs)
コード例 #27
0
ファイル: clipboards.py プロジェクト: Goutham2591/OMK_PART2
def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = '\s+'

    return read_table(StringIO(text), sep=sep, **kwargs)
コード例 #28
0
 def test_read_csv_dataframe(self):
     df = read_csv(self.csv1, index_col=0, parse_dates=True)
     df2 = read_table(self.csv1, sep=',', index_col=0, parse_dates=True)
     self.assert_(np.array_equal(df.columns, ['A', 'B', 'C', 'D']))
     self.assert_(df.index.name == 'index')
     self.assert_(
         isinstance(df.index[0], (datetime, np.datetime64, Timestamp)))
     self.assert_(df.values.dtype == np.float64)
     assert_frame_equal(df, df2)
コード例 #29
0
ファイル: test_parsers.py プロジェクト: kfatyas/pandas
    def test_duplicate_columns(self):
        data = """A,A,B,B,B
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=',')
        self.assert_(np.array_equal(df.columns,
                                    ['A', 'A.1', 'B', 'B.1', 'B.2']))
コード例 #30
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_duplicate_columns(self):
        data = """A,A,B,B,B
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=',')
        self.assert_(np.array_equal(df.columns,
                                    ['A', 'A.1', 'B', 'B.1', 'B.2']))
コード例 #31
0
ファイル: test_date_converters.py プロジェクト: t1c1/pandas
 def test_generic(self):
     data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11."
     datecols = {'ym': [0, 1]}
     dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1)
     df = read_table(StringIO(data), sep=',', header=0,
                     parse_dates=datecols,
                     date_parser=dateconverter)
     self.assert_('ym' in df)
     self.assert_(df.ym.ix[0] == date(2001, 1, 1))
コード例 #32
0
    def test_regex_separator(self):
        data = """   A   B   C   D
a   1   2   3   4
b   1   2   3   4
c   1   2   3   4
"""
        df = read_table(StringIO(data), sep='\s+')
        expected = read_csv(StringIO(re.sub('[ ]+', ',', data)), index_col=0)
        self.assert_(expected.index.name is None)
        assert_frame_equal(df, expected)
コード例 #33
0
    def test_parse_date_fields(self):
        result = conv.parse_date_fields(self.years, self.months, self.days)
        expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
        self.assertTrue((result == expected).all())

        data = "year, month, day, a\n 2001 , 01 , 10 , 10.\n 2001 , 02 , 1 , 11."
        datecols = {"ymd": [0, 1, 2]}
        df = read_table(StringIO(data), sep=",", header=0, parse_dates=datecols, date_parser=conv.parse_date_fields)
        self.assertIn("ymd", df)
        self.assertEqual(df.ymd.ix[0], datetime(2001, 1, 10))
コード例 #34
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_read_table_buglet_4x_multiindex(self):
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        # it works!
        df = read_table(StringIO(text), sep='\s+')
        self.assertEquals(df.index.names, ['one', 'two', 'three', 'four'])
コード例 #35
0
 def _get_probe_mapping(self, agilent_file):
     """
     Given an agilent file that maps probe ids to gene symbols
     return dataframe with ProbeID and GeneSymbol columns
     """
     agl = parsers.read_table( join( self.working_dir, agilent_file ) )
     agl.set_index('ProbeID')
     agl2 = agl[agl['GeneSymbol'].notnull()]
     agl2 =  agl2.set_index('ProbeID')
     return agl2
コード例 #36
0
 def _get_data( self, data_file, annotations_file):
     """
     Given data file and annotations, make dataframe indexed by
     ProbeName with control probes dropped
     """
     data_orig = parsers.read_table( join(self.working_dir,data_file) )
     annot = self._get_annotations( annotations_file)
     #set data index
     data_orig.index = annot['ProbeName']
     return self._drop_controls( data_orig, annotations_file )
コード例 #37
0
    def test_read_table_buglet_4x_multiindex(self):
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        # it works!
        df = read_table(StringIO(text), sep='\s+')
        self.assertEquals(df.index.names, ['one', 'two', 'three', 'four'])
コード例 #38
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
    def test_unnamed_columns(self):
        data = """A,B,C,,
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        expected = [[1, 2, 3, 4, 5.0], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
        df = read_table(StringIO(data), sep=",")
        assert_almost_equal(df.values, expected)
        self.assert_(np.array_equal(df.columns, ["A", "B", "C", "Unnamed: 3", "Unnamed: 4"]))
コード例 #39
0
ファイル: test_parsers.py プロジェクト: smc77/pandas
    def test_regex_separator(self):
        data = """   A   B   C   D
a   1   2   3   4
b   1   2   3   4
c   1   2   3   4
"""
        df = read_table(StringIO(data), sep="\s+")
        expected = read_csv(StringIO(re.sub("[ ]+", ",", data)), index_col=0)
        self.assert_(expected.index.name is None)
        assert_frame_equal(df, expected)
コード例 #40
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_squeeze(self):
        data = """\
a,1
b,2
c,3
"""
        expected = Series([1,2,3], ['a', 'b', 'c'])
        result = read_table(StringIO(data), sep=',', index_col=0,
                            header=None, squeeze=True)
        self.assert_(isinstance(result, Series))
        assert_series_equal(result, expected)
コード例 #41
0
ファイル: analyze_data.py プロジェクト: dkorenci/feedsucker
def readTable(fileName):
    columnTypes = {'id':int, 'date_published':datetime, 'date_saved':datetime,
                  'url':str, 'author':str, 'title_scraped':str, 'title_feed':str,
                  'description':str, 'text':str } # does not seem to have effect
    table = parsers.read_table(fileName, quotechar='"', parse_dates = [2])
    table.replace(to_replace = {'text': {NaN:''}}, inplace=True) # empty string are read in as NaN, replace
    # date_published containes "null" values, so read_table wont parse the dates
    # code below seems to throw exceptions, see the docs
    newDatePub = table['date_published'].apply(lambda a : pandas.tslib.Timestamp(a) if a != 'null' else None)
    table['date_published'] = newDatePub
    return table
コード例 #42
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_url(self):
        import urllib2
        try:
            # HTTP(S)
            url = ('https://raw.github.com/pydata/pandas/master/'
                   'pandas/io/tests/salary.table')
            url_table = read_table(url)
            dirpath = curpath()
            localtable = os.path.join(dirpath, 'salary.table')
            local_table = read_table(localtable)
            assert_frame_equal(url_table, local_table)
            #TODO: ftp testing

        except urllib2.URLError:
            try:
                urllib2.urlopen('http://www.google.com')
            except urllib2.URLError:
                raise nose.SkipTest
            else:
                raise
コード例 #43
0
    def test_datetime_fractional_seconds(self):
        data = """\
year, month, day, hour, minute, second, a, b
2001, 01, 05, 10, 00, 0.123456, 0.0, 10.
2001, 01, 5, 10, 0, 0.500000, 1., 11.
"""
        datecols = {"ymdHMS": [0, 1, 2, 3, 4, 5]}
        df = read_table(StringIO(data), sep=",", header=0, parse_dates=datecols, date_parser=conv.parse_all_fields)
        self.assertIn("ymdHMS", df)
        self.assertEqual(df.ymdHMS.ix[0], datetime(2001, 1, 5, 10, 0, 0, microsecond=123456))
        self.assertEqual(df.ymdHMS.ix[1], datetime(2001, 1, 5, 10, 0, 0, microsecond=500000))
コード例 #44
0
    def test_1000_sep(self):
        data = """A|B|C
1|2,334.0|5
10|13|10.
"""
        expected = [[1, 2334., 5], [10, 13, 10]]

        df = read_csv(StringIO(data), sep='|', thousands=',')
        assert_almost_equal(df.values, expected)

        df = read_table(StringIO(data), sep='|', thousands=',')
        assert_almost_equal(df.values, expected)
コード例 #45
0
ファイル: test_date_converters.py プロジェクト: t1c1/pandas
    def test_parse_date_fields(self):
        result = conv.parse_date_fields(self.years, self.months, self.days)
        expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
        self.assert_((result == expected).all())

        data = "year, month, day, a\n 2001 , 01 , 10 , 10.\n 2001 , 02 , 1 , 11."
        datecols = {'ymd': [0, 1, 2]}
        df = read_table(StringIO(data), sep=',', header=0,
                        parse_dates=datecols,
                        date_parser=conv.parse_date_fields)
        self.assert_('ymd' in df)
        self.assert_(df.ymd.ix[0] == datetime(2001, 1, 10))
コード例 #46
0
ファイル: test_parsers.py プロジェクト: MikeLindenau/pandas
    def test_comment(self):
        data = """A,B,C
1,2.,4.#hello world
5.,NaN,10.0
"""
        expected = [[1., 2., 4.],
                    [5., np.nan, 10.]]
        df = read_csv(StringIO(data), comment='#')
        assert_almost_equal(df.values, expected)

        df = read_table(StringIO(data), sep=',', comment='#', na_values=['NaN'])
        assert_almost_equal(df.values, expected)
コード例 #47
0
    def test_unnamed_columns(self):
        data = """A,B,C,,
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        expected = [[1, 2, 3, 4, 5.], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
        df = read_table(StringIO(data), sep=',')
        assert_almost_equal(df.values, expected)
        self.assert_(
            np.array_equal(df.columns,
                           ['A', 'B', 'C', 'Unnamed: 3', 'Unnamed: 4']))
コード例 #48
0
ファイル: test_network.py プロジェクト: xcompass/pandas
def check_compressed_urls(salaries_table, compression, extension, mode,
                          engine):
    # test reading compressed urls with various engines and
    # extension inference
    base_url = ('https://github.com/pandas-dev/pandas/raw/master/'
                'pandas/tests/io/parser/data/salaries.csv')

    url = base_url + extension

    if mode != 'explicit':
        compression = mode

    url_table = read_table(url, compression=compression, engine=engine)
    tm.assert_frame_equal(url_table, salaries_table)
コード例 #49
0
    def test_malformed(self):
        # all
        data = """ignore
A,B,C
1,2,3 # comment
1,2,3,4,5
2,3,4
"""

        try:
            df = read_table(StringIO(data), sep=',', header=1, comment='#')
            self.assert_(False)
        except ValueError, inst:
            self.assert_('Expecting 3 columns, got 5 in row 3' in str(inst))
コード例 #50
0
    def test_squeeze(self):
        data = """\
a,1
b,2
c,3
"""
        expected = Series([1, 2, 3], ['a', 'b', 'c'])
        result = read_table(StringIO(data),
                            sep=',',
                            index_col=0,
                            header=None,
                            squeeze=True)
        self.assert_(isinstance(result, Series))
        assert_series_equal(result, expected)
コード例 #51
0
    def test_comment(self):
        data = """A,B,C
1,2.,4.#hello world
5.,NaN,10.0
"""
        expected = [[1., 2., 4.], [5., np.nan, 10.]]
        df = read_csv(StringIO(data), comment='#')
        assert_almost_equal(df.values, expected)

        df = read_table(StringIO(data),
                        sep=',',
                        comment='#',
                        na_values=['NaN'])
        assert_almost_equal(df.values, expected)
コード例 #52
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()
    return read_table(StringIO(text), **kwargs)
コード例 #53
0
ファイル: test_date_converters.py プロジェクト: t1c1/pandas
    def test_datetime_fractional_seconds(self):
        data = """\
year, month, day, hour, minute, second, a, b
2001, 01, 05, 10, 00, 0.123456, 0.0, 10.
2001, 01, 5, 10, 0, 0.500000, 1., 11.
"""
        datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]}
        df = read_table(StringIO(data), sep=',', header=0,
                        parse_dates=datecols,
                        date_parser=conv.parse_all_fields)
        self.assert_('ymdHMS' in df)
        self.assert_(df.ymdHMS.ix[0] == datetime(2001, 1, 5, 10, 0, 0,
                                                 microsecond=123456))
        self.assert_(df.ymdHMS.ix[1] == datetime(2001, 1, 5, 10, 0, 0,
                                                 microsecond=500000))
コード例 #54
0
    def test_quoting(self):
        bad_line_small = """printer\tresult\tvariant_name
Klosterdruckerei\tKlosterdruckerei <Salem> (1611-1804)\tMuller, Jacob
Klosterdruckerei\tKlosterdruckerei <Salem> (1611-1804)\tMuller, Jakob
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\t"Furststiftische Hofdruckerei,  <Kempten""
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\tGaller, Alois
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\tHochfurstliche Buchhandlung <Kempten>"""
        self.assertRaises(Exception,
                          read_table,
                          StringIO(bad_line_small),
                          sep='\t')

        good_line_small = bad_line_small + '"'
        df = read_table(StringIO(good_line_small), sep='\t')
        self.assert_(len(df) == 3)
コード例 #55
0
ファイル: test_date_converters.py プロジェクト: t1c1/pandas
    def test_datetime_six_col(self):
        result = conv.parse_all_fields(self.years, self.months, self.days,
                                       self.hours, self.minutes, self.seconds)
        self.assert_((result == self.expected).all())

        data = """\
year, month, day, hour, minute, second, a, b
2001, 01, 05, 10, 00, 0, 0.0, 10.
2001, 01, 5, 10, 0, 00, 1., 11.
"""
        datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]}
        df = read_table(StringIO(data), sep=',', header=0,
                        parse_dates=datecols,
                        date_parser=conv.parse_all_fields)
        self.assert_('ymdHMS' in df)
        self.assert_(df.ymdHMS.ix[0] == datetime(2001, 1, 5, 10, 0, 0))
コード例 #56
0
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text,
                encoding=(kwargs.get('encoding')
                          or get_option('display.encoding')))
        except:
            pass

    # Excel copies into clipboard with \t seperation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        kwargs['sep'] = '\t'

    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'

    return read_table(StringIO(text), **kwargs)
コード例 #57
0
ファイル: test_parsers.py プロジェクト: kfatyas/pandas
    def test_custom_na_values(self):
        data = """A,B,C
ignore,this,row
1,NA,3
-1.#IND,5,baz
7,8,NaN
"""
        expected = [[1., nan, 3],
                    [nan, 5, nan],
                    [7, 8, nan]]

        df = read_csv(StringIO(data), na_values=['baz'], skiprows=[1])
        assert_almost_equal(df.values, expected)

        df2 = read_table(StringIO(data), sep=',', na_values=['baz'],
                         skiprows=[1])
        assert_almost_equal(df2.values, expected)
コード例 #58
0
def readTable(fileName):
    columnTypes = {
        'id': int,
        'date_published': datetime,
        'date_saved': datetime,
        'url': str,
        'author': str,
        'title_scraped': str,
        'title_feed': str,
        'description': str,
        'text': str
    }  # does not seem to have effect
    table = parsers.read_table(fileName, quotechar='"', parse_dates=[2])
    table.replace(to_replace={'text': {
        NaN: ''
    }}, inplace=True)  # empty string are read in as NaN, replace
    # date_published containes "null" values, so read_table wont parse the dates
    # code below seems to throw exceptions, see the docs
    newDatePub = table['date_published'].apply(
        lambda a: pandas.tslib.Timestamp(a) if a != 'null' else None)
    table['date_published'] = newDatePub
    return table
コード例 #59
0
def salaries_table(parser_data):
    """DataFrame with the salaries dataset"""
    path = os.path.join(parser_data, 'salaries.csv')
    return read_table(path)