Ejemplo n.º 1
0
    def test_deprecated_match(self):
        # Old match behavior, deprecated (but still default) in 0.13
        values = Series(["fooBAD__barBAD", NA, "foo"])

        with tm.assert_produces_warning():
            result = values.str.match(".*(BAD[_]+).*(BAD)")
        exp = Series([("BAD__", "BAD"), NA, []])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["aBAD_BAD", NA, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0])

        with tm.assert_produces_warning():
            rs = Series(mixed).str.match(".*(BAD[_]+).*(BAD)")
        xp = [("BAD_", "BAD"), NA, ("BAD_", "BAD"), NA, NA, [], NA, NA, NA]
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u("fooBAD__barBAD"), NA, u("foo")])

        with tm.assert_produces_warning():
            result = values.str.match(".*(BAD[_]+).*(BAD)")
        exp = Series([(u("BAD__"), u("BAD")), NA, []])
        tm.assert_series_equal(result, exp)
Ejemplo n.º 2
0
    def test_replace(self):
        values = Series(["fooBAD__barBAD", NA])

        result = values.str.replace("BAD[_]*", "")
        exp = Series(["foobar", NA])
        tm.assert_series_equal(result, exp)

        result = values.str.replace("BAD[_]*", "", n=1)
        exp = Series(["foobarBAD", NA])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["aBAD", NA, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0])

        rs = Series(mixed).str.replace("BAD[_]*", "")
        xp = ["a", NA, "b", NA, NA, "foo", NA, NA, NA]
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u("fooBAD__barBAD"), NA])

        result = values.str.replace("BAD[_]*", "")
        exp = Series([u("foobar"), NA])
        tm.assert_series_equal(result, exp)

        result = values.str.replace("BAD[_]*", "", n=1)
        exp = Series([u("foobarBAD"), NA])
        tm.assert_series_equal(result, exp)

        # flags + unicode
        values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
        exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
        result = values.str.replace("(?<=\w),(?=\w)", ", ", flags=re.UNICODE)
        tm.assert_series_equal(result, exp)
Ejemplo n.º 3
0
    def test_split(self):
        values = Series(["a_b_c", "c_d_e", NA, "f_g_h"])

        result = values.str.split("_")
        exp = Series([["a", "b", "c"], ["c", "d", "e"], NA, ["f", "g", "h"]])
        tm.assert_series_equal(result, exp)

        # more than one char
        values = Series(["a__b__c", "c__d__e", NA, "f__g__h"])
        result = values.str.split("__")
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["a_b_c", NA, "d_e_f", True, datetime.today(), None, 1, 2.0])

        rs = Series(mixed).str.split("_")
        xp = Series([["a", "b", "c"], NA, ["d", "e", "f"], NA, NA, NA, NA, NA])

        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u("a_b_c"), u("c_d_e"), NA, u("f_g_h")])

        result = values.str.split("_")
        exp = Series([[u("a"), u("b"), u("c")], [u("c"), u("d"), u("e")], NA, [u("f"), u("g"), u("h")]])
        tm.assert_series_equal(result, exp)
Ejemplo n.º 4
0
    def test_endswith(self):
        values = Series(["om", NA, "foo_nom", "nom", "bar_foo", NA, "foo"])

        result = values.str.endswith("foo")
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]
        rs = strings.str_endswith(mixed, "f")
        xp = [False, NA, False, NA, NA, False, NA, NA, NA]
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.endswith("f")
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u("om"), NA, u("foo_nom"), u("nom"), u("bar_foo"), NA, u("foo")])

        result = values.str.endswith("foo")
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        result = values.str.endswith("foo", na=False)
        tm.assert_series_equal(result, exp.fillna(False).astype(bool))
Ejemplo n.º 5
0
    def test_count(self):
        values = ['foo', 'foofoo', NA, 'foooofooofommmfoo']

        result = strings.str_count(values, 'f[o]+')
        exp = [1, 2, NA, 4]
        tm.assert_almost_equal(result, exp)

        result = Series(values).str.count('f[o]+')
        tm.assert_isinstance(result, Series)
        tm.assert_almost_equal(result, exp)

        # mixed
        mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]
        rs = strings.str_count(mixed, 'a')
        xp = [1, NA, 0, NA, NA, 0, NA, NA, NA]
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.count('a')
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = [u('foo'), u('foofoo'), NA, u('foooofooofommmfoo')]

        result = strings.str_count(values, 'f[o]+')
        exp = [1, 2, NA, 4]
        tm.assert_almost_equal(result, exp)

        result = Series(values).str.count('f[o]+')
        tm.assert_isinstance(result, Series)
        tm.assert_almost_equal(result, exp)
Ejemplo n.º 6
0
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
    """
    internal. pprinter for iterables. you should probably use pprint_thing()
    rather then calling this directly.
    """
    fmt = u("{{{things}}}")
    pairs = []

    pfmt = u("{key}: {val}")

    if max_seq_items is False:
        nitems = len(seq)
    else:
        nitems = max_seq_items or get_option("max_seq_items") or len(seq)

    for k, v in list(seq.items())[:nitems]:
        pairs.append(
            pfmt.format(
                key=pprint_thing(k, _nest_lvl + 1,
                                 max_seq_items=max_seq_items, **kwds),
                val=pprint_thing(v, _nest_lvl + 1,
                                 max_seq_items=max_seq_items, **kwds)))

    if nitems < len(seq):
        return fmt.format(things=", ".join(pairs) + ", ...")
    else:
        return fmt.format(things=", ".join(pairs))
Ejemplo n.º 7
0
    def test_deprecated_match(self):
        # Old match behavior, deprecated (but still default) in 0.13
        values = Series(['fooBAD__barBAD', NA, 'foo'])

        with tm.assert_produces_warning():
            result = values.str.match('.*(BAD[_]+).*(BAD)')
        exp = Series([('BAD__', 'BAD'), NA, []])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(),
                        'foo', None, 1, 2.])

        with tm.assert_produces_warning():
            rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)')
        xp = [('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), NA, NA, [], NA, NA, NA]
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('fooBAD__barBAD'), NA, u('foo')])

        with tm.assert_produces_warning():
            result = values.str.match('.*(BAD[_]+).*(BAD)')
        exp = Series([(u('BAD__'), u('BAD')), NA, []])
        tm.assert_series_equal(result, exp)
Ejemplo n.º 8
0
    def test_match(self):
        # New match behavior introduced in 0.13
        values = Series(['fooBAD__barBAD', NA, 'foo'])
        with tm.assert_produces_warning():
            result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True)
        exp = Series([True, NA, False])
        tm.assert_series_equal(result, exp)

        # If no groups, use new behavior even when as_indexer is False.
        # (Old behavior is pretty much useless in this case.)
        values = Series(['fooBAD__barBAD', NA, 'foo'])
        result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False)
        exp = Series([True, NA, False])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(),
                        'foo', None, 1, 2.])

        with tm.assert_produces_warning():
            rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True)
        xp = [True, NA, True, NA, NA, False, NA, NA, NA]
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('fooBAD__barBAD'), NA, u('foo')])

        with tm.assert_produces_warning():
            result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True)
        exp = Series([True, NA, False])
        tm.assert_series_equal(result, exp)
Ejemplo n.º 9
0
    def test_endswith(self):
        values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo'])

        result = values.str.endswith('foo')
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]
        rs = strings.str_endswith(mixed, 'f')
        xp = [False, NA, False, NA, NA, False, NA, NA, NA]
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.endswith('f')
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA,
                         u('foo')])

        result = values.str.endswith('foo')
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        result = values.str.endswith('foo', na=False)
        tm.assert_series_equal(result, exp.fillna(False).astype(bool))
Ejemplo n.º 10
0
    def test_astype_unicode(self):

        # GH7758
        # a bit of magic is required to set default encoding encoding to utf-8
        digits = string.digits
        test_series = [
            Series([digits * 10,
                    tm.rands(63),
                    tm.rands(64),
                    tm.rands(1000)]),
            Series([u('データーサイエンス、お前はもう死んでいる')]),
        ]

        former_encoding = None
        if not compat.PY3:
            # in python we can force the default encoding for this test
            former_encoding = sys.getdefaultencoding()
            reload(sys)  # noqa
            sys.setdefaultencoding("utf-8")
        if sys.getdefaultencoding() == "utf-8":
            test_series.append(Series([u('野菜食べないとやばい').encode("utf-8")]))
        for s in test_series:
            res = s.astype("unicode")
            expec = s.map(compat.text_type)
            assert_series_equal(res, expec)
        # restore the former encoding
        if former_encoding is not None and former_encoding != "utf-8":
            reload(sys)  # noqa
            sys.setdefaultencoding(former_encoding)
Ejemplo n.º 11
0
    def testArrayNumpyLabelled(self):
        input = {'a': []}
        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
        self.assertTrue((np.empty((1, 0)) == output[0]).all())
        self.assertTrue((np.array(['a']) == output[1]).all())
        self.assertTrue(output[2] is None)

        input = [{'a': 42}]
        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
        self.assertTrue((np.array([42]) == output[0]).all())
        self.assertTrue(output[1] is None)
        self.assertTrue((np.array([u('a')]) == output[2]).all())

        # py3 is non-determinstic on the ordering......
        if not compat.PY3:
            input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}]
            output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
            expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2))
            self.assertTrue((expectedvals == output[0]).all())
            self.assertTrue(output[1] is None)
            self.assertTrue((np.array([u('a'), 'b']) == output[2]).all())


            input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}}
            output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
            expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2))
            self.assertTrue((expectedvals == output[0]).all())
            self.assertTrue((np.array(['1','2','3']) == output[1]).all())
            self.assertTrue((np.array(['a', 'b']) == output[2]).all())
Ejemplo n.º 12
0
def load_reduce(self):
    stack = self.stack
    args = stack.pop()
    func = stack[-1]
    if type(args[0]) is type:
        n = args[0].__name__
        if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'):
            stack[-1] = object.__new__(Series)
            return
        elif n == u('DeprecatedSparseSeries') or n == u('DeprecatedSparseTimeSeries'):
            stack[-1] = object.__new__(SparseSeries)
            return

    try:
        value = func(*args)
    except:

        # try to reencode the arguments
        if self.encoding is not None:
            args = tuple([ arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args ])
            try:
                stack[-1] = func(*args)
                return
            except:
                pass

        if self.is_verbose:
            print(sys.exc_info())
            print(func, args)
        raise

    stack[-1] = value
Ejemplo n.º 13
0
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
    """
    internal. pprinter for iterables. you should probably use pprint_thing()
    rather then calling this directly.

    bounds length of printed sequence, depending on options
    """
    if isinstance(seq, set):
        fmt = u("{{{body}}}")
    else:
        fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})")

    if max_seq_items is False:
        nitems = len(seq)
    else:
        nitems = max_seq_items or get_option("max_seq_items") or len(seq)

    s = iter(seq)
    r = []
    for i in range(min(nitems, len(seq))):  # handle sets, no slicing
        r.append(pprint_thing(
            next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
    body = ", ".join(r)

    if nitems < len(seq):
        body += ", ..."
    elif isinstance(seq, tuple) and len(seq) == 1:
        body += ','

    return fmt.format(body=body)
Ejemplo n.º 14
0
    def test_to_latex_escape(self):
        a = 'a'
        b = 'b'

        test_dict = {u('co^l1'): {a: "a",
                                  b: "b"},
                     u('co$e^x$'): {a: "a",
                                    b: "b"}}

        unescaped_result = DataFrame(test_dict).to_latex(escape=False)
        escaped_result = DataFrame(test_dict).to_latex(
        )  # default: escape=True

        unescaped_expected = r'''\begin{tabular}{lll}
\toprule
{} & co$e^x$ & co^l1 \\
\midrule
a &       a &     a \\
b &       b &     b \\
\bottomrule
\end{tabular}
'''

        escaped_expected = r'''\begin{tabular}{lll}
\toprule
{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\
\midrule
a &       a &     a \\
b &       b &     b \\
\bottomrule
\end{tabular}
'''

        assert unescaped_result == unescaped_expected
        assert escaped_result == escaped_expected
Ejemplo n.º 15
0
    def test_array_numpy_labelled(self):
        labelled_input = {"a": []}
        output = ujson.loads(ujson.dumps(labelled_input),
                             numpy=True, labelled=True)
        assert (np.empty((1, 0)) == output[0]).all()
        assert (np.array(["a"]) == output[1]).all()
        assert output[2] is None

        labelled_input = [{"a": 42}]
        output = ujson.loads(ujson.dumps(labelled_input),
                             numpy=True, labelled=True)
        assert (np.array([u("a")]) == output[2]).all()
        assert (np.array([42]) == output[0]).all()
        assert output[1] is None

        # see gh-10837: write out the dump explicitly
        # so there is no dependency on iteration order
        input_dumps = ('[{"a": 42, "b":31}, {"a": 24, "c": 99}, '
                       '{"a": 2.4, "b": 78}]')
        output = ujson.loads(input_dumps, numpy=True, labelled=True)
        expected_vals = np.array(
            [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2))
        assert (expected_vals == output[0]).all()
        assert output[1] is None
        assert (np.array([u("a"), "b"]) == output[2]).all()

        input_dumps = ('{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, '
                       '"3": {"a": 2.4, "b": 78}}')
        output = ujson.loads(input_dumps, numpy=True, labelled=True)
        expected_vals = np.array(
            [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2))
        assert (expected_vals == output[0]).all()
        assert (np.array(["1", "2", "3"]) == output[1]).all()
        assert (np.array(["a", "b"]) == output[2]).all()
Ejemplo n.º 16
0
 def test_to_html_unicode(self, datapath):
     df = DataFrame({u('\u03c3'): np.arange(10.)})
     expected = expected_html(datapath, 'unicode_1')
     assert df.to_html() == expected
     df = DataFrame({'A': [u('\u03c3')]})
     expected = expected_html(datapath, 'unicode_2')
     assert df.to_html() == expected
Ejemplo n.º 17
0
 def test_to_html_unicode(self):
     df = DataFrame({u('\u03c3'): np.arange(10.)})
     expected = u'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>\u03c3</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>6.0</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>7.0</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>8.0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>9.0</td>\n    </tr>\n  </tbody>\n</table>'  # noqa
     assert df.to_html() == expected
     df = DataFrame({'A': [u('\u03c3')]})
     expected = u'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>A</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>\u03c3</td>\n    </tr>\n  </tbody>\n</table>'  # noqa
     assert df.to_html() == expected
Ejemplo n.º 18
0
def _build_option_description(k):
    """ Builds a formatted description of a registered option and prints it """

    o = _get_registered_option(k)
    d = _get_deprecated_option(k)

    s = u('{k} ').format(k=k)

    if o.doc:
        s += '\n'.join(o.doc.strip().split('\n'))
    else:
        s += 'No description available.'

    if o:
        s += (u('\n    [default: {default}] [currently: {current}]')
              .format(default=o.defval, current=_get_option(k, True)))

    if d:
        s += u('\n    (Deprecated')
        s += (u(', use `{rkey}` instead.')
              .format(rkey=d.rkey if d.rkey else ''))
        s += u(')')

    s += '\n\n'
    return s
Ejemplo n.º 19
0
    def test_count(self):
        values = ["foo", "foofoo", NA, "foooofooofommmfoo"]

        result = strings.str_count(values, "f[o]+")
        exp = [1, 2, NA, 4]
        tm.assert_almost_equal(result, exp)

        result = Series(values).str.count("f[o]+")
        tm.assert_isinstance(result, Series)
        tm.assert_almost_equal(result, exp)

        # mixed
        mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]
        rs = strings.str_count(mixed, "a")
        xp = [1, NA, 0, NA, NA, 0, NA, NA, NA]
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.count("a")
        tm.assert_isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = [u("foo"), u("foofoo"), NA, u("foooofooofommmfoo")]

        result = strings.str_count(values, "f[o]+")
        exp = [1, 2, NA, 4]
        tm.assert_almost_equal(result, exp)

        result = Series(values).str.count("f[o]+")
        tm.assert_isinstance(result, Series)
        tm.assert_almost_equal(result, exp)
Ejemplo n.º 20
0
 def axis_pretty(a):
     v = getattr(self, a)
     if len(v) > 0:
         return u('%s axis: %s to %s') % (a.capitalize(),
                                          com.pprint_thing(v[0]),
                                          com.pprint_thing(v[-1]))
     else:
         return u('%s axis: None') % a.capitalize()
Ejemplo n.º 21
0
def test_is_recompilable():
    passes = (r"a", u("x"), r"asdf", re.compile("adsf"), u(r"\u2233\s*"), re.compile(r""))
    fails = 1, [], object()

    for p in passes:
        assert com.is_re_compilable(p)

    for f in fails:
        assert not com.is_re_compilable(f)
Ejemplo n.º 22
0
    def test_to_csv_unicode_index(self):
        buf = StringIO()
        s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])

        s.to_csv(buf, encoding="UTF-8")
        buf.seek(0)

        s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
        assert_series_equal(s, s2)
Ejemplo n.º 23
0
    def test_read_csv(self):
        if not compat.PY3:
            if compat.is_platform_windows():
                prefix = u("file:///")
            else:
                prefix = u("file://")

            fname = prefix + compat.text_type(self.csv1)
            self.read_csv(fname, index_col=0, parse_dates=True)
Ejemplo n.º 24
0
    def _format_native_types(self, na_rep=u('NaT'), **kwargs):

        values = np.array(list(self), dtype=object)
        mask = isnull(self.values)
        values[mask] = na_rep

        imask = ~mask
        values[imask] = np.array([u('%s') % dt for dt in values[imask]])
        return values
Ejemplo n.º 25
0
    def test_wdi_download(self):
        raise nose.SkipTest

        expected = {'GDPPCKN': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('37857.1261134552'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('37081.4575704003'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('72720.0691255285'), (u('Mexico'), u('2004')): u('74751.6003347038'), (u('Mexico'), u('2005')): u('76200.2154469437'), (u('Canada'), u('2005')): u('38617.4563629611')}, 'GDPPCKD': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('34397.055116118'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('33692.2812368928'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('7608.43848670658'), (u('Mexico'), u('2004')): u('7820.99026814334'), (u('Mexico'), u('2005')): u('7972.55364129367'), (u('Canada'), u('2005')): u('35087.8925933298')}}
        expected = pandas.DataFrame(expected)
        result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD',
                                                                         'GDPPCKN', 'junk'], start=2003, end=2005)
        expected.index = result.index
        assert_frame_equal(result, pandas.DataFrame(expected))
Ejemplo n.º 26
0
    def test_encode_decode(self):
        base = Series([u('a'), u('b'), u('a\xe4')])
        series = base.str.encode('utf-8')

        f = lambda x: x.decode('utf-8')
        result = series.str.decode('utf-8')
        exp = series.map(f)

        tm.assert_series_equal(result, exp)
Ejemplo n.º 27
0
    def test_encode_decode(self):
        base = Series([u("a"), u("b"), u("a\xe4")])
        series = base.str.encode("utf-8")

        f = lambda x: x.decode("utf-8")
        result = series.str.decode("utf-8")
        exp = series.map(f)

        tm.assert_series_equal(result, exp)
Ejemplo n.º 28
0
    def test_repr_should_return_str(self):
        # https://docs.python.org/3/reference/datamodel.html#object.__repr__
        # ...The return value must be a string object.

        # (str on py2.x, str (unicode) on py3)

        data = [8, 5, 3, 5]
        index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")]
        df = Series(data, index=index1)
        assert type(df.__repr__() == str)  # both py2 / 3
Ejemplo n.º 29
0
    def setup_method(self, _):
        self.lst = [3, 5, 7, -2]
        self.klass = FrozenNDArray

        with warnings.catch_warnings(record=True):
            warnings.simplefilter("ignore", FutureWarning)

            self.container = FrozenNDArray(self.lst)
            self.unicode_container = FrozenNDArray(
                [u("\u05d0"), u("\u05d1"), "c"])
Ejemplo n.º 30
0
def test_is_recompilable():
    passes = (r'a', u('x'), r'asdf', re.compile('adsf'),
              u(r'\u2233\s*'), re.compile(r''))
    fails = 1, [], object()

    for p in passes:
        assert com.is_re_compilable(p)

    for f in fails:
        assert not com.is_re_compilable(f)
    def test_filter(self):
        # Items
        filtered = self.frame.filter(['A', 'B', 'E'])
        assert len(filtered.columns) == 2
        assert 'E' not in filtered

        filtered = self.frame.filter(['A', 'B', 'E'], axis='columns')
        assert len(filtered.columns) == 2
        assert 'E' not in filtered

        # Other axis
        idx = self.frame.index[0:4]
        filtered = self.frame.filter(idx, axis='index')
        expected = self.frame.reindex(index=idx)
        tm.assert_frame_equal(filtered, expected)

        # like
        fcopy = self.frame.copy()
        fcopy['AA'] = 1

        filtered = fcopy.filter(like='A')
        assert len(filtered.columns) == 2
        assert 'AA' in filtered

        # like with ints in column names
        df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B'])
        filtered = df.filter(like='_')
        assert len(filtered.columns) == 2

        # regex with ints in column names
        # from PR #10384
        df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C'])
        expected = DataFrame(0.,
                             index=[0, 1, 2],
                             columns=pd.Index([1, 2], dtype=object))
        filtered = df.filter(regex='^[0-9]+$')
        tm.assert_frame_equal(filtered, expected)

        expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1'])
        # shouldn't remove anything
        filtered = expected.filter(regex='^[0-9]+$')
        tm.assert_frame_equal(filtered, expected)

        # pass in None
        with tm.assert_raises_regex(TypeError, 'Must pass'):
            self.frame.filter()
        with tm.assert_raises_regex(TypeError, 'Must pass'):
            self.frame.filter(items=None)
        with tm.assert_raises_regex(TypeError, 'Must pass'):
            self.frame.filter(axis=1)

        # test mutually exclusive arguments
        with tm.assert_raises_regex(TypeError, 'mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$', like='bbi')
        with tm.assert_raises_regex(TypeError, 'mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$', axis=1)
        with tm.assert_raises_regex(TypeError, 'mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$')
        with tm.assert_raises_regex(TypeError, 'mutually exclusive'):
            self.frame.filter(items=['one', 'three'], like='bbi', axis=0)
        with tm.assert_raises_regex(TypeError, 'mutually exclusive'):
            self.frame.filter(items=['one', 'three'], like='bbi')

        # objects
        filtered = self.mixed_frame.filter(like='foo')
        assert 'foo' in filtered

        # unicode columns, won't ascii-encode
        df = self.frame.rename(columns={'B': u('\u2202')})
        filtered = df.filter(like='C')
        assert 'C' in filtered
Ejemplo n.º 32
0
    def test_double_long_numbers(self, long_number):
        sut = {u("a"): long_number}
        encoded = ujson.encode(sut, double_precision=15)

        decoded = ujson.decode(encoded)
        assert sut == decoded
def _u(x):
    return {u(k): _u(x[k]) for k in x} if isinstance(x, dict) else x
Ejemplo n.º 34
0
class TestUltraJSONTests(object):
    @pytest.mark.skipif(compat.is_platform_32bit(),
                        reason="not compliant on 32-bit, xref #15865")
    def test_encode_decimal(self):
        sut = decimal.Decimal("1337.1337")
        encoded = ujson.encode(sut, double_precision=15)
        decoded = ujson.decode(encoded)
        assert decoded == 1337.1337

        sut = decimal.Decimal("0.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.94")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "0.9"

        decoded = ujson.decode(encoded)
        assert decoded == 0.9

        sut = decimal.Decimal("1.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "2.0"

        decoded = ujson.decode(encoded)
        assert decoded == 2.0

        sut = decimal.Decimal("-1.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "-2.0"

        decoded = ujson.decode(encoded)
        assert decoded == -2.0

        sut = decimal.Decimal("0.995")
        encoded = ujson.encode(sut, double_precision=2)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.9995")
        encoded = ujson.encode(sut, double_precision=3)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.99999999999999944")
        encoded = ujson.encode(sut, double_precision=15)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

    @pytest.mark.parametrize("ensure_ascii", [True, False])
    def test_encode_string_conversion(self, ensure_ascii):
        string_input = "A string \\ / \b \f \n \r \t </script> &"
        not_html_encoded = ('"A string \\\\ \\/ \\b \\f \\n '
                            '\\r \\t <\\/script> &"')
        html_encoded = ('"A string \\\\ \\/ \\b \\f \\n \\r \\t '
                        '\\u003c\\/script\\u003e \\u0026"')

        def helper(expected_output, **encode_kwargs):
            output = ujson.encode(string_input,
                                  ensure_ascii=ensure_ascii,
                                  **encode_kwargs)

            assert output == expected_output
            assert string_input == json.loads(output)
            assert string_input == ujson.decode(output)

        # Default behavior assumes encode_html_chars=False.
        helper(not_html_encoded)

        # Make sure explicit encode_html_chars=False works.
        helper(not_html_encoded, encode_html_chars=False)

        # Make sure explicit encode_html_chars=True does the encoding.
        helper(html_encoded, encode_html_chars=True)

    @pytest.mark.parametrize(
        "long_number",
        [-4342969734183514, -12345678901234.56789012, -528656961.4399388])
    def test_double_long_numbers(self, long_number):
        sut = {u("a"): long_number}
        encoded = ujson.encode(sut, double_precision=15)

        decoded = ujson.decode(encoded)
        assert sut == decoded

    def test_encode_non_c_locale(self):
        lc_category = locale.LC_NUMERIC

        # We just need one of these locales to work.
        for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
            if tm.can_set_locale(new_locale, lc_category):
                with tm.set_locale(new_locale, lc_category):
                    assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60
                    assert ujson.loads("4.78", precise_float=True) == 4.78
                break

    def test_decimal_decode_test_precise(self):
        sut = {u("a"): 4.56}
        encoded = ujson.encode(sut)
        decoded = ujson.decode(encoded, precise_float=True)
        assert sut == decoded

    @pytest.mark.skipif(compat.is_platform_windows() and not compat.PY3,
                        reason="buggy on win-64 for py2")
    def test_encode_double_tiny_exponential(self):
        num = 1e-40
        assert num == ujson.decode(ujson.encode(num))
        num = 1e-100
        assert num == ujson.decode(ujson.encode(num))
        num = -1e-45
        assert num == ujson.decode(ujson.encode(num))
        num = -1e-145
        assert np.allclose(num, ujson.decode(ujson.encode(num)))

    @pytest.mark.parametrize("unicode_key", [u("key1"), u("بن")])
    def test_encode_dict_with_unicode_keys(self, unicode_key):
        unicode_dict = {unicode_key: u("value1")}
        assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))

    @pytest.mark.parametrize(
        "double_input",
        [
            math.pi,
            -math.pi  # Should work with negatives too.
        ])
    def test_encode_double_conversion(self, double_input):
        output = ujson.encode(double_input)
        assert round(double_input, 5) == round(json.loads(output), 5)
        assert round(double_input, 5) == round(ujson.decode(output), 5)

    def test_encode_with_decimal(self):
        decimal_input = 1.0
        output = ujson.encode(decimal_input)

        assert output == "1.0"

    def test_encode_array_of_nested_arrays(self):
        nested_input = [[[[]]]] * 20
        output = ujson.encode(nested_input)

        assert nested_input == json.loads(output)
        assert nested_input == ujson.decode(output)

        nested_input = np.array(nested_input)
        tm.assert_numpy_array_equal(
            nested_input,
            ujson.decode(output, numpy=True, dtype=nested_input.dtype))

    def test_encode_array_of_doubles(self):
        doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337
                         ] * 10
        output = ujson.encode(doubles_input)

        assert doubles_input == json.loads(output)
        assert doubles_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(doubles_input),
                                    ujson.decode(output, numpy=True))

    def test_double_precision(self):
        double_input = 30.012345678901234
        output = ujson.encode(double_input, double_precision=15)

        assert double_input == json.loads(output)
        assert double_input == ujson.decode(output)

        for double_precision in (3, 9):
            output = ujson.encode(double_input,
                                  double_precision=double_precision)
            rounded_input = round(double_input, double_precision)

            assert rounded_input == json.loads(output)
            assert rounded_input == ujson.decode(output)

    @pytest.mark.parametrize("invalid_val", [20, -1, "9", None])
    def test_invalid_double_precision(self, invalid_val):
        double_input = 30.12345678901234567890
        expected_exception = (ValueError
                              if isinstance(invalid_val, int) else TypeError)

        with pytest.raises(expected_exception):
            ujson.encode(double_input, double_precision=invalid_val)

    def test_encode_string_conversion2(self):
        string_input = "A string \\ / \b \f \n \r \t"
        output = ujson.encode(string_input)

        assert string_input == json.loads(output)
        assert string_input == ujson.decode(output)
        assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'

    @pytest.mark.parametrize(
        "unicode_input",
        ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"])
    def test_encode_unicode_conversion(self, unicode_input):
        enc = ujson.encode(unicode_input)
        dec = ujson.decode(enc)

        assert enc == json_unicode(unicode_input)
        assert dec == json.loads(enc)

    def test_encode_control_escaping(self):
        escaped_input = "\x19"
        enc = ujson.encode(escaped_input)
        dec = ujson.decode(enc)

        assert escaped_input == dec
        assert enc == json_unicode(escaped_input)

    def test_encode_unicode_surrogate_pair(self):
        surrogate_input = "\xf0\x90\x8d\x86"
        enc = ujson.encode(surrogate_input)
        dec = ujson.decode(enc)

        assert enc == json_unicode(surrogate_input)
        assert dec == json.loads(enc)

    def test_encode_unicode_4bytes_utf8(self):
        four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
        enc = ujson.encode(four_bytes_input)
        dec = ujson.decode(enc)

        assert enc == json_unicode(four_bytes_input)
        assert dec == json.loads(enc)

    def test_encode_unicode_4bytes_utf8highest(self):
        four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
        enc = ujson.encode(four_bytes_input)

        dec = ujson.decode(enc)

        assert enc == json_unicode(four_bytes_input)
        assert dec == json.loads(enc)

    def test_encode_array_in_array(self):
        arr_in_arr_input = [[[[]]]]
        output = ujson.encode(arr_in_arr_input)

        assert arr_in_arr_input == json.loads(output)
        assert output == json.dumps(arr_in_arr_input)
        assert arr_in_arr_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(arr_in_arr_input),
                                    ujson.decode(output, numpy=True))

    @pytest.mark.parametrize(
        "num_input",
        [
            31337,
            -31337,  # Negative number.
            -9223372036854775808  # Large negative number.
        ])
    def test_encode_num_conversion(self, num_input):
        output = ujson.encode(num_input)
        assert num_input == json.loads(output)
        assert output == json.dumps(num_input)
        assert num_input == ujson.decode(output)

    def test_encode_list_conversion(self):
        list_input = [1, 2, 3, 4]
        output = ujson.encode(list_input)

        assert list_input == json.loads(output)
        assert list_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(list_input),
                                    ujson.decode(output, numpy=True))

    def test_encode_dict_conversion(self):
        dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
        output = ujson.encode(dict_input)

        assert dict_input == json.loads(output)
        assert dict_input == ujson.decode(output)

    @pytest.mark.parametrize("builtin_value", [None, True, False])
    def test_encode_builtin_values_conversion(self, builtin_value):
        output = ujson.encode(builtin_value)
        assert builtin_value == json.loads(output)
        assert output == json.dumps(builtin_value)
        assert builtin_value == ujson.decode(output)

    def test_encode_datetime_conversion(self):
        datetime_input = datetime.datetime.fromtimestamp(time.time())
        output = ujson.encode(datetime_input, date_unit="s")
        expected = calendar.timegm(datetime_input.utctimetuple())

        assert int(expected) == json.loads(output)
        assert int(expected) == ujson.decode(output)

    def test_encode_date_conversion(self):
        date_input = datetime.date.fromtimestamp(time.time())
        output = ujson.encode(date_input, date_unit="s")

        tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
        expected = calendar.timegm(tup)

        assert int(expected) == json.loads(output)
        assert int(expected) == ujson.decode(output)

    @pytest.mark.parametrize("test", [
        datetime.time(),
        datetime.time(1, 2, 3),
        datetime.time(10, 12, 15, 343243),
    ])
    def test_encode_time_conversion_basic(self, test):
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    def test_encode_time_conversion_pytz(self):
        # see gh-11473: to_json segfaults with timezone-aware datetimes
        test = datetime.time(10, 12, 15, 343243, pytz.utc)
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    def test_encode_time_conversion_dateutil(self):
        # see gh-11473: to_json segfaults with timezone-aware datetimes
        test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    @pytest.mark.parametrize(
        "decoded_input",
        [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf])
    def test_encode_as_null(self, decoded_input):
        assert ujson.encode(decoded_input) == "null", "Expected null"

    def test_datetime_units(self):
        val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val, date_unit='s'))
        assert roundtrip == stamp.value // 10**9

        roundtrip = ujson.decode(ujson.encode(val, date_unit='ms'))
        assert roundtrip == stamp.value // 10**6

        roundtrip = ujson.decode(ujson.encode(val, date_unit='us'))
        assert roundtrip == stamp.value // 10**3

        roundtrip = ujson.decode(ujson.encode(val, date_unit='ns'))
        assert roundtrip == stamp.value

        pytest.raises(ValueError, ujson.encode, val, date_unit='foo')

    def test_encode_to_utf8(self):
        unencoded = "\xe6\x97\xa5\xd1\x88"

        enc = ujson.encode(unencoded, ensure_ascii=False)
        dec = ujson.decode(enc)

        assert enc == json_unicode(unencoded, ensure_ascii=False)
        assert dec == json.loads(enc)

    def test_decode_from_unicode(self):
        unicode_input = u("{\"obj\": 31337}")

        dec1 = ujson.decode(unicode_input)
        dec2 = ujson.decode(str(unicode_input))

        assert dec1 == dec2

    def test_encode_recursion_max(self):
        # 8 is the max recursion depth

        class O2(object):
            member = 0
            pass

        class O1(object):
            member = 0
            pass

        decoded_input = O1()
        decoded_input.member = O2()
        decoded_input.member.member = decoded_input

        with pytest.raises(OverflowError):
            ujson.encode(decoded_input)

    def test_decode_jibberish(self):
        jibberish = "fdsa sda v9sa fdsa"

        with pytest.raises(ValueError):
            ujson.decode(jibberish)

    @pytest.mark.parametrize(
        "broken_json",
        [
            "[",  # Broken array start.
            "{",  # Broken object start.
            "]",  # Broken array end.
            "}",  # Broken object end.
        ])
    def test_decode_broken_json(self, broken_json):
        with pytest.raises(ValueError):
            ujson.decode(broken_json)

    @pytest.mark.parametrize("too_big_char", [
        "[",
        "{",
    ])
    def test_decode_depth_too_big(self, too_big_char):
        with pytest.raises(ValueError):
            ujson.decode(too_big_char * (1024 * 1024))

    @pytest.mark.parametrize(
        "bad_string",
        [
            "\"TESTING",  # Unterminated.
            "\"TESTING\\\"",  # Unterminated escape.
            "tru",  # Broken True.
            "fa",  # Broken False.
            "n",  # Broken None.
        ])
    def test_decode_bad_string(self, bad_string):
        with pytest.raises(ValueError):
            ujson.decode(bad_string)

    @pytest.mark.parametrize("broken_json", [
        '{{1337:""}}',
        '{{"key":"}',
        '[[[true',
    ])
    def test_decode_broken_json_leak(self, broken_json):
        for _ in range(1000):
            with pytest.raises(ValueError):
                ujson.decode(broken_json)

    @pytest.mark.parametrize(
        "invalid_dict",
        [
            "{{{{31337}}}}",  # No key.
            "{{{{\"key\":}}}}",  # No value.
            "{{{{\"key\"}}}}",  # No colon or value.
        ])
    def test_decode_invalid_dict(self, invalid_dict):
        with pytest.raises(ValueError):
            ujson.decode(invalid_dict)

    @pytest.mark.parametrize(
        "numeric_int_as_str",
        [
            "31337",
            "-31337"  # Should work with negatives.
        ])
    def test_decode_numeric_int(self, numeric_int_as_str):
        assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str)

    @pytest.mark.skipif(compat.PY3, reason="only PY2")
    def test_encode_unicode_4bytes_utf8_fail(self):
        with pytest.raises(OverflowError):
            ujson.encode("\xfd\xbf\xbf\xbf\xbf\xbf")

    def test_encode_null_character(self):
        wrapped_input = "31337 \x00 1337"
        output = ujson.encode(wrapped_input)

        assert wrapped_input == json.loads(output)
        assert output == json.dumps(wrapped_input)
        assert wrapped_input == ujson.decode(output)

        alone_input = "\x00"
        output = ujson.encode(alone_input)

        assert alone_input == json.loads(output)
        assert output == json.dumps(alone_input)
        assert alone_input == ujson.decode(output)
        assert '"  \\u0000\\r\\n "' == ujson.dumps(u("  \u0000\r\n "))

    def test_decode_null_character(self):
        wrapped_input = "\"31337 \\u0000 31337\""
        assert ujson.decode(wrapped_input) == json.loads(wrapped_input)

    def test_encode_list_long_conversion(self):
        long_input = [
            9223372036854775807, 9223372036854775807, 9223372036854775807,
            9223372036854775807, 9223372036854775807, 9223372036854775807
        ]
        output = ujson.encode(long_input)

        assert long_input == json.loads(output)
        assert long_input == ujson.decode(output)

        tm.assert_numpy_array_equal(
            np.array(long_input),
            ujson.decode(output, numpy=True, dtype=np.int64))

    def test_encode_long_conversion(self):
        long_input = 9223372036854775807
        output = ujson.encode(long_input)

        assert long_input == json.loads(output)
        assert output == json.dumps(long_input)
        assert long_input == ujson.decode(output)

    @pytest.mark.parametrize(
        "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"])
    def test_decode_numeric_int_exp(self, int_exp):
        assert ujson.decode(int_exp) == json.loads(int_exp)

    def test_dump_to_file(self):
        f = StringIO()
        ujson.dump([1, 2, 3], f)
        assert "[1,2,3]" == f.getvalue()

    def test_dump_to_file_like(self):
        class FileLike(object):
            def __init__(self):
                self.bytes = ''

            def write(self, data_bytes):
                self.bytes += data_bytes

        f = FileLike()
        ujson.dump([1, 2, 3], f)
        assert "[1,2,3]" == f.bytes

    def test_dump_file_args_error(self):
        with pytest.raises(TypeError):
            ujson.dump([], "")

    def test_load_file(self):
        data = "[1,2,3,4]"
        exp_data = [1, 2, 3, 4]

        f = StringIO(data)
        assert exp_data == ujson.load(f)

        f = StringIO(data)
        tm.assert_numpy_array_equal(np.array(exp_data),
                                    ujson.load(f, numpy=True))

    def test_load_file_like(self):
        class FileLike(object):
            def read(self):
                try:
                    self.end
                except AttributeError:
                    self.end = True
                    return "[1,2,3,4]"

        exp_data = [1, 2, 3, 4]

        f = FileLike()
        assert exp_data == ujson.load(f)

        f = FileLike()
        tm.assert_numpy_array_equal(np.array(exp_data),
                                    ujson.load(f, numpy=True))

    def test_load_file_args_error(self):
        with pytest.raises(TypeError):
            ujson.load("[]")

    def test_version(self):
        assert re.match(r'^\d+\.\d+(\.\d+)?$', ujson.__version__), \
            "ujson.__version__ must be a string like '1.4.0'"

    def test_encode_numeric_overflow(self):
        with pytest.raises(OverflowError):
            ujson.encode(12839128391289382193812939)

    def test_encode_numeric_overflow_nested(self):
        class Nested(object):
            x = 12839128391289382193812939

        for _ in range(0, 100):
            with pytest.raises(OverflowError):
                ujson.encode(Nested())

    @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1])
    def test_decode_number_with_32bit_sign_bit(self, val):
        # Test that numbers that fit within 32 bits but would have the
        # sign bit set (2**31 <= x < 2**32) are decoded properly.
        doc = '{{"id": {val}}}'.format(val=val)
        assert ujson.decode(doc)["id"] == val

    def test_encode_big_escape(self):
        # Make sure no Exception is raised.
        for _ in range(10):
            base = '\u00e5'.encode("utf-8") if compat.PY3 else "\xc3\xa5"
            escape_input = base * 1024 * 1024 * 2
            ujson.encode(escape_input)

    def test_decode_big_escape(self):
        # Make sure no Exception is raised.
        for _ in range(10):
            base = '\u00e5'.encode("utf-8") if compat.PY3 else "\xc3\xa5"
            quote = compat.str_to_bytes("\"")

            escape_input = quote + (base * 1024 * 1024 * 2) + quote
            ujson.decode(escape_input)

    def test_to_dict(self):
        d = {u("key"): 31337}

        class DictTest(object):
            def toDict(self):
                return d

        o = DictTest()
        output = ujson.encode(o)

        dec = ujson.decode(output)
        assert dec == d

    def test_default_handler(self):
        class _TestObject(object):
            def __init__(self, val):
                self.val = val

            @property
            def recursive_attr(self):
                return _TestObject("recursive_attr")

            def __str__(self):
                return str(self.val)

        pytest.raises(OverflowError, ujson.encode, _TestObject("foo"))
        assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str)

        def my_handler(_):
            return "foobar"

        assert '"foobar"' == ujson.encode(_TestObject("foo"),
                                          default_handler=my_handler)

        def my_handler_raises(_):
            raise TypeError("I raise for anything")

        with pytest.raises(TypeError, match="I raise for anything"):
            ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)

        def my_int_handler(_):
            return 42

        assert ujson.decode(
            ujson.encode(_TestObject("foo"),
                         default_handler=my_int_handler)) == 42

        def my_obj_handler(_):
            return datetime.datetime(2013, 2, 3)

        assert (ujson.decode(ujson.encode(datetime.datetime(
            2013, 2, 3))) == ujson.decode(
                ujson.encode(_TestObject("foo"),
                             default_handler=my_obj_handler)))

        obj_list = [_TestObject("foo"), _TestObject("bar")]
        assert (json.loads(json.dumps(obj_list, default=str)) == ujson.decode(
            ujson.encode(obj_list, default_handler=str)))
Ejemplo n.º 35
0
 def test_decimal_decode_test_precise(self):
     sut = {u("a"): 4.56}
     encoded = ujson.encode(sut)
     decoded = ujson.decode(encoded, precise_float=True)
     assert sut == decoded
Ejemplo n.º 36
0
    def test_to_html_regression_GH6098(self):
        df = DataFrame({
            u('clé1'): [u('a'), u('a'), u('b'),
                        u('b'), u('a')],
            u('clé2'): [u('1er'),
                        u('2ème'),
                        u('1er'),
                        u('2ème'),
                        u('1er')],
            'données1':
            np.random.randn(5),
            'données2':
            np.random.randn(5)
        })

        # it works
        df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_()
Ejemplo n.º 37
0
 def test_unicode_repr(self):
     mat = np.empty((N, 2), dtype=object)
     mat[:, 0] = 'foo'
     mat[:, 1] = 'bar'
     cols = ['b', u("\u05d0")]
     str_repr = repr(make_block(mat.T, cols, TEST_COLS))
Ejemplo n.º 38
0
 def test_bytestring_with_unicode(self):
     df = Series([u("\u05d0")], name=u("\u05d1"))
     bytes(df)
Ejemplo n.º 39
0
 def test_convert_accepts_unicode(self):
     r1 = self.dtc.convert("12:22", None, None)
     r2 = self.dtc.convert(u("12:22"), None, None)
     assert (r1 == r2), "DatetimeConverter.convert should accept unicode"
    def test_column_dups_operations(self):
        def check(result, expected=None):
            if expected is not None:
                assert_frame_equal(result, expected)
            result.dtypes
            str(result)

        # assignment
        # GH 3687
        arr = np.random.randn(3, 2)
        idx = lrange(2)
        df = DataFrame(arr, columns=['A', 'A'])
        df.columns = idx
        expected = DataFrame(arr, columns=idx)
        check(df, expected)

        idx = date_range('20130101', periods=4, freq='Q-NOV')
        df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],
                       columns=['a', 'a', 'a', 'a'])
        df.columns = idx
        expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],
                             columns=idx)
        check(df, expected)

        # insert
        df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],
                       columns=['foo', 'bar', 'foo', 'hello'])
        df['string'] = 'bah'
        expected = DataFrame(
            [[1, 1, 1, 5, 'bah'], [1, 1, 2, 5, 'bah'], [2, 1, 3, 5, 'bah']],
            columns=['foo', 'bar', 'foo', 'hello', 'string'])
        check(df, expected)
        with tm.assert_raises_regex(ValueError, 'Length of value'):
            df.insert(0, 'AnotherColumn', range(len(df.index) - 1))

        # insert same dtype
        df['foo2'] = 3
        expected = DataFrame(
            [[1, 1, 1, 5, 'bah', 3], [1, 1, 2, 5, 'bah', 3],
             [2, 1, 3, 5, 'bah', 3]],
            columns=['foo', 'bar', 'foo', 'hello', 'string', 'foo2'])
        check(df, expected)

        # set (non-dup)
        df['foo2'] = 4
        expected = DataFrame(
            [[1, 1, 1, 5, 'bah', 4], [1, 1, 2, 5, 'bah', 4],
             [2, 1, 3, 5, 'bah', 4]],
            columns=['foo', 'bar', 'foo', 'hello', 'string', 'foo2'])
        check(df, expected)
        df['foo2'] = 3

        # delete (non dup)
        del df['bar']
        expected = DataFrame(
            [[1, 1, 5, 'bah', 3], [1, 2, 5, 'bah', 3], [2, 3, 5, 'bah', 3]],
            columns=['foo', 'foo', 'hello', 'string', 'foo2'])
        check(df, expected)

        # try to delete again (its not consolidated)
        del df['hello']
        expected = DataFrame(
            [[1, 1, 'bah', 3], [1, 2, 'bah', 3], [2, 3, 'bah', 3]],
            columns=['foo', 'foo', 'string', 'foo2'])
        check(df, expected)

        # consolidate
        df = df._consolidate()
        expected = DataFrame(
            [[1, 1, 'bah', 3], [1, 2, 'bah', 3], [2, 3, 'bah', 3]],
            columns=['foo', 'foo', 'string', 'foo2'])
        check(df, expected)

        # insert
        df.insert(2, 'new_col', 5.)
        expected = DataFrame(
            [[1, 1, 5., 'bah', 3], [1, 2, 5., 'bah', 3], [2, 3, 5., 'bah', 3]],
            columns=['foo', 'foo', 'new_col', 'string', 'foo2'])
        check(df, expected)

        # insert a dup
        tm.assert_raises_regex(ValueError, 'cannot insert', df.insert, 2,
                               'new_col', 4.)
        df.insert(2, 'new_col', 4., allow_duplicates=True)
        expected = DataFrame(
            [[1, 1, 4., 5., 'bah', 3], [1, 2, 4., 5., 'bah', 3],
             [2, 3, 4., 5., 'bah', 3]],
            columns=['foo', 'foo', 'new_col', 'new_col', 'string', 'foo2'])
        check(df, expected)

        # delete (dup)
        del df['foo']
        expected = DataFrame(
            [[4., 5., 'bah', 3], [4., 5., 'bah', 3], [4., 5., 'bah', 3]],
            columns=['new_col', 'new_col', 'string', 'foo2'])
        assert_frame_equal(df, expected)

        # dup across dtypes
        df = DataFrame([[1, 1, 1., 5], [1, 1, 2., 5], [2, 1, 3., 5]],
                       columns=['foo', 'bar', 'foo', 'hello'])
        check(df)

        df['foo2'] = 7.
        expected = DataFrame(
            [[1, 1, 1., 5, 7.], [1, 1, 2., 5, 7.], [2, 1, 3., 5, 7.]],
            columns=['foo', 'bar', 'foo', 'hello', 'foo2'])
        check(df, expected)

        result = df['foo']
        expected = DataFrame([[1, 1.], [1, 2.], [2, 3.]],
                             columns=['foo', 'foo'])
        check(result, expected)

        # multiple replacements
        df['foo'] = 'string'
        expected = DataFrame(
            [['string', 1, 'string', 5, 7.], ['string', 1, 'string', 5, 7.],
             ['string', 1, 'string', 5, 7.]],
            columns=['foo', 'bar', 'foo', 'hello', 'foo2'])
        check(df, expected)

        del df['foo']
        expected = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],
                             columns=['bar', 'hello', 'foo2'])
        check(df, expected)

        # values
        df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=['x', 'x'])
        result = df.values
        expected = np.array([[1, 2.5], [3, 4.5]])
        assert (result == expected).all().all()

        # rename, GH 4403
        df4 = DataFrame(
            {
                'TClose': [22.02],
                'RT': [0.0454],
                'TExg': [0.0422]
            },
            index=MultiIndex.from_tuples([(600809, 20130331)],
                                         names=['STK_ID', 'RPT_Date']))

        df5 = DataFrame(
            {
                'STK_ID': [600809] * 3,
                'RPT_Date': [20120930, 20121231, 20130331],
                'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')],
                'TClose': [38.05, 41.66, 30.01]
            },
            index=MultiIndex.from_tuples([(600809, 20120930),
                                          (600809, 20121231),
                                          (600809, 20130331)],
                                         names=['STK_ID', 'RPT_Date']))

        k = pd.merge(df4, df5, how='inner', left_index=True, right_index=True)
        result = k.rename(columns={
            'TClose_x': 'TClose',
            'TClose_y': 'QT_Close'
        })
        str(result)
        result.dtypes

        expected = (DataFrame(
            [[0.0454, 22.02, 0.0422, 20130331, 600809,
              u('饡驦'), 30.01]],
            columns=[
                'RT', 'TClose', 'TExg', 'RPT_Date', 'STK_ID', 'STK_Name',
                'QT_Close'
            ]).set_index(['STK_ID', 'RPT_Date'], drop=False))
        assert_frame_equal(result, expected)

        # reindex is invalid!
        df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],
                       columns=['bar', 'a', 'a'])
        pytest.raises(ValueError, df.reindex, columns=['bar'])
        pytest.raises(ValueError, df.reindex, columns=['bar', 'foo'])

        # drop
        df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],
                       columns=['bar', 'a', 'a'])
        result = df.drop(['a'], axis=1)
        expected = DataFrame([[1], [1], [1]], columns=['bar'])
        check(result, expected)
        result = df.drop('a', axis=1)
        check(result, expected)

        # describe
        df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                       columns=['bar', 'a', 'a'],
                       dtype='float64')
        result = df.describe()
        s = df.iloc[:, 0].describe()
        expected = pd.concat([s, s, s], keys=df.columns, axis=1)
        check(result, expected)

        # check column dups with index equal and not equal to df's index
        df = DataFrame(np.random.randn(5, 3),
                       index=['a', 'b', 'c', 'd', 'e'],
                       columns=['A', 'B', 'A'])
        for index in [df.index, pd.Index(list('edcba'))]:
            this_df = df.copy()
            expected_ser = pd.Series(index.values, index=this_df.index)
            expected_df = DataFrame.from_items([('A', expected_ser),
                                                ('B', this_df['B']),
                                                ('A', expected_ser)])
            this_df['A'] = index
            check(this_df, expected_df)

        # operations
        for op in ['__add__', '__mul__', '__sub__', '__truediv__']:
            df = DataFrame(dict(A=np.arange(10), B=np.random.rand(10)))
            expected = getattr(df, op)(df)
            expected.columns = ['A', 'A']
            df.columns = ['A', 'A']
            result = getattr(df, op)(df)
            check(result, expected)

        # multiple assignments that change dtypes
        # the location indexer is a slice
        # GH 6120
        df = DataFrame(np.random.randn(5, 2), columns=['that', 'that'])
        expected = DataFrame(1.0, index=range(5), columns=['that', 'that'])

        df['that'] = 1.0
        check(df, expected)

        df = DataFrame(np.random.rand(5, 2), columns=['that', 'that'])
        expected = DataFrame(1, index=range(5), columns=['that', 'that'])

        df['that'] = 1
        check(df, expected)
Ejemplo n.º 41
0
def randu(n):
    choices = u("").join(map(unichr, lrange(1488, 1488 + 26)))
    choices += string.digits
    return ''.join([random.choice(choices) for _ in range(n)])
Ejemplo n.º 42
0
        hash(c)  # this will not raise


@pytest.mark.parametrize("ll", [re.compile('ad')])
def test_is_re_passes(ll):
    assert inference.is_re(ll)


@pytest.mark.parametrize("ll", ['x', 2, 3, object()])
def test_is_re_fails(ll):
    assert not inference.is_re(ll)


@pytest.mark.parametrize("ll", [
    r'a',
    u('x'), r'asdf',
    re.compile('adsf'),
    u(r'\u2233\s*'),
    re.compile(r'')
])
def test_is_recompilable_passes(ll):
    assert inference.is_re_compilable(ll)


@pytest.mark.parametrize("ll", [1, [], object()])
def test_is_recompilable_fails(ll):
    assert not inference.is_re_compilable(ll)


class TestInference(object):
    def test_infer_dtype_bytes(self):
Ejemplo n.º 43
0
 def test_missing_unicode_key(self):
     df = DataFrame({"a": [1]})
     try:
         df.loc[:, u("\u05d0")]  # should not raise UnicodeEncodeError
     except KeyError:
         pass  # this is the expected exception
Ejemplo n.º 44
0
 def test_unicode_repr_doesnt_raise(self):
     repr(create_mgr(u('b,\u05d0: object')))
Ejemplo n.º 45
0
 def test_convert_accepts_unicode(self):
     r1 = self.pc.convert("2012-1-1", None, self.axis)
     r2 = self.pc.convert(u("2012-1-1"), None, self.axis)
     assert r1 == r2
Ejemplo n.º 46
0
    def _write_header(self, indent):
        truncate_h = self.fmt.truncate_h
        row_levels = self.frame.index.nlevels
        if not self.fmt.header:
            # write nothing
            return indent

        def _column_header():
            if self.fmt.index:
                row = [''] * (self.frame.index.nlevels - 1)
            else:
                row = []

            if isinstance(self.columns, ABCMultiIndex):
                if self.fmt.has_column_names and self.fmt.index:
                    row.append(single_column_table(self.columns.names))
                else:
                    row.append('')
                style = "text-align: {just};".format(just=self.fmt.justify)
                row.extend([
                    single_column_table(c, self.fmt.justify, style)
                    for c in self.columns
                ])
            else:
                if self.fmt.index:
                    row.append(self.columns.name or '')
                row.extend(self.columns)
            return row

        self.write('<thead>', indent)

        indent += self.indent_delta

        if isinstance(self.columns, ABCMultiIndex):
            template = 'colspan="{span:d}" halign="left"'

            if self.fmt.sparsify:
                # GH3547
                sentinel = com.sentinel_factory()
            else:
                sentinel = None
            levels = self.columns.format(sparsify=sentinel,
                                         adjoin=False,
                                         names=False)
            level_lengths = get_level_lengths(levels, sentinel)
            inner_lvl = len(level_lengths) - 1
            for lnum, (records,
                       values) in enumerate(zip(level_lengths, levels)):
                if truncate_h:
                    # modify the header lines
                    ins_col = self.fmt.tr_col_num
                    if self.fmt.sparsify:
                        recs_new = {}
                        # Increment tags after ... col.
                        for tag, span in list(records.items()):
                            if tag >= ins_col:
                                recs_new[tag + 1] = span
                            elif tag + span > ins_col:
                                recs_new[tag] = span + 1
                                if lnum == inner_lvl:
                                    values = (values[:ins_col] + (u('...'), ) +
                                              values[ins_col:])
                                else:
                                    # sparse col headers do not receive a ...
                                    values = (values[:ins_col] +
                                              (values[ins_col - 1], ) +
                                              values[ins_col:])
                            else:
                                recs_new[tag] = span
                            # if ins_col lies between tags, all col headers
                            # get ...
                            if tag + span == ins_col:
                                recs_new[ins_col] = 1
                                values = (values[:ins_col] + (u('...'), ) +
                                          values[ins_col:])
                        records = recs_new
                        inner_lvl = len(level_lengths) - 1
                        if lnum == inner_lvl:
                            records[ins_col] = 1
                    else:
                        recs_new = {}
                        for tag, span in list(records.items()):
                            if tag >= ins_col:
                                recs_new[tag + 1] = span
                            else:
                                recs_new[tag] = span
                        recs_new[ins_col] = 1
                        records = recs_new
                        values = (values[:ins_col] + [u('...')] +
                                  values[ins_col:])

                name = self.columns.names[lnum]
                row = [''] * (row_levels - 1) + [
                    '' if name is None else pprint_thing(name)
                ]

                if row == [""] and self.fmt.index is False:
                    row = []

                tags = {}
                j = len(row)
                for i, v in enumerate(values):
                    if i in records:
                        if records[i] > 1:
                            tags[j] = template.format(span=records[i])
                    else:
                        continue
                    j += 1
                    row.append(v)
                self.write_tr(row,
                              indent,
                              self.indent_delta,
                              tags=tags,
                              header=True)
        else:
            col_row = _column_header()
            align = self.fmt.justify

            if truncate_h:
                ins_col = row_levels + self.fmt.tr_col_num
                col_row.insert(ins_col, '...')

            self.write_tr(col_row,
                          indent,
                          self.indent_delta,
                          header=True,
                          align=align)

        if all((self.fmt.has_index_names, self.fmt.index,
                self.fmt.show_index_names)):
            row = (
                [x if x is not None else '' for x in self.frame.index.names] +
                [''] * min(len(self.columns), self.max_cols))
            if truncate_h:
                ins_col = row_levels + self.fmt.tr_col_num
                row.insert(ins_col, '')
            self.write_tr(row, indent, self.indent_delta, header=True)

        indent -= self.indent_delta
        self.write('</thead>', indent)

        return indent
Ejemplo n.º 47
0
def test_timtetonum_accepts_unicode():
    assert (converter.time2num("00:01") == converter.time2num(u("00:01")))
Ejemplo n.º 48
0
 def _format_native_types(self, na_rep=u('NaT'),
                          date_format=None, **kwargs):
     from pandas.core.format import Timedelta64Formatter
     return Timedelta64Formatter(values=self,
                                 nat_rep=na_rep,
                                 justify='all').get_result()
Ejemplo n.º 49
0
    def _write_hierarchical_rows(self, fmt_values, indent):
        template = 'rowspan="{span}" valign="top"'

        truncate_h = self.fmt.truncate_h
        truncate_v = self.fmt.truncate_v
        frame = self.fmt.tr_frame
        ncols = len(frame.columns)
        nrows = len(frame)
        row_levels = self.frame.index.nlevels

        idx_values = frame.index.format(sparsify=False,
                                        adjoin=False,
                                        names=False)
        idx_values = lzip(*idx_values)

        if self.fmt.sparsify:
            # GH3547
            sentinel = com.sentinel_factory()
            levels = frame.index.format(sparsify=sentinel,
                                        adjoin=False,
                                        names=False)

            level_lengths = get_level_lengths(levels, sentinel)
            inner_lvl = len(level_lengths) - 1
            if truncate_v:
                # Insert ... row and adjust idx_values and
                # level_lengths to take this into account.
                ins_row = self.fmt.tr_row_num
                inserted = False
                for lnum, records in enumerate(level_lengths):
                    rec_new = {}
                    for tag, span in list(records.items()):
                        if tag >= ins_row:
                            rec_new[tag + 1] = span
                        elif tag + span > ins_row:
                            rec_new[tag] = span + 1

                            # GH 14882 - Make sure insertion done once
                            if not inserted:
                                dot_row = list(idx_values[ins_row - 1])
                                dot_row[-1] = u('...')
                                idx_values.insert(ins_row, tuple(dot_row))
                                inserted = True
                            else:
                                dot_row = list(idx_values[ins_row])
                                dot_row[inner_lvl - lnum] = u('...')
                                idx_values[ins_row] = tuple(dot_row)
                        else:
                            rec_new[tag] = span
                        # If ins_row lies between tags, all cols idx cols
                        # receive ...
                        if tag + span == ins_row:
                            rec_new[ins_row] = 1
                            if lnum == 0:
                                idx_values.insert(
                                    ins_row,
                                    tuple([u('...')] * len(level_lengths)))

                            # GH 14882 - Place ... in correct level
                            elif inserted:
                                dot_row = list(idx_values[ins_row])
                                dot_row[inner_lvl - lnum] = u('...')
                                idx_values[ins_row] = tuple(dot_row)
                    level_lengths[lnum] = rec_new

                level_lengths[inner_lvl][ins_row] = 1
                for ix_col in range(len(fmt_values)):
                    fmt_values[ix_col].insert(ins_row, '...')
                nrows += 1

            for i in range(nrows):
                row = []
                tags = {}

                sparse_offset = 0
                j = 0
                for records, v in zip(level_lengths, idx_values[i]):
                    if i in records:
                        if records[i] > 1:
                            tags[j] = template.format(span=records[i])
                    else:
                        sparse_offset += 1
                        continue

                    j += 1
                    row.append(v)

                row.extend(fmt_values[j][i] for j in range(ncols))
                if truncate_h:
                    row.insert(
                        row_levels - sparse_offset + self.fmt.tr_col_num,
                        '...')
                self.write_tr(row,
                              indent,
                              self.indent_delta,
                              tags=tags,
                              nindex_levels=len(levels) - sparse_offset)
        else:
            for i in range(len(frame)):
                idx_values = list(
                    zip(*frame.index.format(
                        sparsify=False, adjoin=False, names=False)))
                row = []
                row.extend(idx_values[i])
                row.extend(fmt_values[j][i] for j in range(ncols))
                if truncate_h:
                    row.insert(row_levels + self.fmt.tr_col_num, '...')
                self.write_tr(row,
                              indent,
                              self.indent_delta,
                              tags=None,
                              nindex_levels=frame.index.nlevels)
Ejemplo n.º 50
0
 def test_tidy_repr(self):
     a = Series([u("\u05d0")] * 1000)
     a.name = 'title1'
     repr(a)  # should not raise exception
Ejemplo n.º 51
0
    def write_result(self, buf):
        """
        Render a DataFrame to a LaTeX tabular/longtable environment output.
        """

        # string representation of the columns
        if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
            info_line = (
                u('Empty {name}\nColumns: {col}\nIndex: {idx}').format(
                    name=type(self.frame).__name__,
                    col=self.frame.columns,
                    idx=self.frame.index))
            strcols = [[info_line]]
        else:
            strcols = self.fmt._to_str_columns()

        def get_col_type(dtype):
            if issubclass(dtype.type, np.number):
                return 'r'
            else:
                return 'l'

        # reestablish the MultiIndex that has been joined by _to_str_column
        if self.fmt.index and isinstance(self.frame.index, MultiIndex):
            out = self.frame.index.format(adjoin=False,
                                          sparsify=self.fmt.sparsify,
                                          names=self.fmt.has_index_names,
                                          na_rep=self.fmt.na_rep)

            # index.format will sparsify repeated entries with empty strings
            # so pad these with some empty space
            def pad_empties(x):
                for pad in reversed(x):
                    if pad:
                        break
                return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]

            out = (pad_empties(i) for i in out)

            # Add empty spaces for each column level
            clevels = self.frame.columns.nlevels
            out = [[' ' * len(i[-1])] * clevels + i for i in out]

            # Add the column names to the last index column
            cnames = self.frame.columns.names
            if any(cnames):
                new_names = [i if i else '{}' for i in cnames]
                out[self.frame.index.nlevels - 1][:clevels] = new_names

            # Get rid of old multiindex column and add new ones
            strcols = out + strcols[1:]

        column_format = self.column_format
        if column_format is None:
            dtypes = self.frame.dtypes._values
            column_format = ''.join(map(get_col_type, dtypes))
            if self.fmt.index:
                index_format = 'l' * self.frame.index.nlevels
                column_format = index_format + column_format
        elif not isinstance(column_format,
                            compat.string_types):  # pragma: no cover
            raise AssertionError('column_format must be str or unicode, '
                                 'not {typ}'.format(typ=type(column_format)))

        if not self.longtable:
            buf.write(
                '\\begin{{tabular}}{{{fmt}}}\n'.format(fmt=column_format))
            buf.write('\\toprule\n')
        else:
            buf.write(
                '\\begin{{longtable}}{{{fmt}}}\n'.format(fmt=column_format))
            buf.write('\\toprule\n')

        ilevels = self.frame.index.nlevels
        clevels = self.frame.columns.nlevels
        nlevels = clevels
        if self.fmt.has_index_names and self.fmt.show_index_names:
            nlevels += 1
        strrows = list(zip(*strcols))
        self.clinebuf = []

        for i, row in enumerate(strrows):
            if i == nlevels and self.fmt.header:
                buf.write('\\midrule\n')  # End of header
                if self.longtable:
                    buf.write('\\endhead\n')
                    buf.write('\\midrule\n')
                    buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next '
                              'page}}}} \\\\\n'.format(n=len(row)))
                    buf.write('\\midrule\n')
                    buf.write('\\endfoot\n\n')
                    buf.write('\\bottomrule\n')
                    buf.write('\\endlastfoot\n')
            if self.fmt.kwds.get('escape', True):
                # escape backslashes first
                crow = [(x.replace('\\', '\\textbackslash ').replace(
                    '_', '\\_').replace('%', '\\%').replace(
                        '$', '\\$').replace('#', '\\#').replace(
                            '{', '\\{').replace('}', '\\}').replace(
                                '~', '\\textasciitilde ').replace(
                                    '^', '\\textasciicircum ').replace(
                                        '&', '\\&') if
                         (x and x != '{}') else '{}') for x in row]
            else:
                crow = [x if x else '{}' for x in row]
            if self.bold_rows and self.fmt.index:
                # bold row labels
                crow = [
                    '\\textbf{{{x}}}'.format(x=x)
                    if j < ilevels and x.strip() not in ['', '{}'] else x
                    for j, x in enumerate(crow)
                ]
            if i < clevels and self.fmt.header and self.multicolumn:
                # sum up columns to multicolumns
                crow = self._format_multicolumn(crow, ilevels)
            if (i >= nlevels and self.fmt.index and self.multirow
                    and ilevels > 1):
                # sum up rows to multirows
                crow = self._format_multirow(crow, ilevels, i, strrows)
            buf.write(' & '.join(crow))
            buf.write(' \\\\\n')
            if self.multirow and i < len(strrows) - 1:
                self._print_cline(buf, i, len(strcols))

        if not self.longtable:
            buf.write('\\bottomrule\n')
            buf.write('\\end{tabular}\n')
        else:
            buf.write('\\end{longtable}\n')
Ejemplo n.º 52
0
    def test_plot(self):
        df = tm.makeTimeDataFrame()
        _check_plot_works(df.plot, grid=False)
        _check_plot_works(df.plot, subplots=True)
        _check_plot_works(df.plot, subplots=True, use_index=False)

        df = DataFrame({'x': [1, 2], 'y': [3, 4]})
        self._check_plot_fails(df.plot, kind='line', blarg=True)

        df = DataFrame(np.random.rand(10, 3),
                       index=list(string.ascii_letters[:10]))
        _check_plot_works(df.plot, use_index=True)
        _check_plot_works(df.plot, sort_columns=False)
        _check_plot_works(df.plot, yticks=[1, 5, 10])
        _check_plot_works(df.plot, xticks=[1, 5, 10])
        _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100))
        _check_plot_works(df.plot, subplots=True, title='blah')
        _check_plot_works(df.plot, title='blah')

        tuples = lzip(string.ascii_letters[:10], range(10))
        df = DataFrame(np.random.rand(10, 3),
                       index=MultiIndex.from_tuples(tuples))
        _check_plot_works(df.plot, use_index=True)

        # unicode
        index = MultiIndex.from_tuples([(u('\u03b1'), 0), (u('\u03b1'), 1),
                                        (u('\u03b2'), 2), (u('\u03b2'), 3),
                                        (u('\u03b3'), 4), (u('\u03b3'), 5),
                                        (u('\u03b4'), 6), (u('\u03b4'), 7)],
                                       names=['i0', 'i1'])
        columns = MultiIndex.from_tuples([('bar', u('\u0394')),
                                          ('bar', u('\u0395'))],
                                         names=['c0', 'c1'])
        df = DataFrame(np.random.randint(0, 10, (8, 2)),
                       columns=columns,
                       index=index)
        _check_plot_works(df.plot, title=u('\u03A3'))
Ejemplo n.º 53
0
 def setUp(self):
     from pandas.io.tests.generate_legacy_pickles import create_data
     self.data = create_data()
     self.path = u('__%s__.pickle' % tm.rands(10))
Ejemplo n.º 54
0
 def test_bytestring_with_unicode(self):
     df = DataFrame({'A': [u("\u05d0")]})
     if compat.PY3:
         bytes(df)
     else:
         str(df)
Ejemplo n.º 55
0
 def test_encode_dict_with_unicode_keys(self, unicode_key):
     unicode_dict = {unicode_key: u("value1")}
     assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))
Ejemplo n.º 56
0
def test_repr_with_unicode_data():
    with pd.core.config.option_context("display.encoding", 'UTF-8'):
        d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
        index = pd.DataFrame(d).set_index(["a", "b"]).index
        assert "\\u" not in repr(index)  # we don't want unicode-escaped
Ejemplo n.º 57
0
def encode(obj):
    """
    Data encoder
    """
    tobj = type(obj)
    if isinstance(obj, Index):
        if isinstance(obj, RangeIndex):
            return {
                u'typ': u'range_index',
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'start': getattr(obj, '_start', None),
                u'stop': getattr(obj, '_stop', None),
                u'step': getattr(obj, '_step', None)
            }
        elif isinstance(obj, PeriodIndex):
            return {
                u'typ': u'period_index',
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'freq': u_safe(getattr(obj, 'freqstr', None)),
                u'dtype': u(obj.dtype.name),
                u'data': convert(obj.asi8),
                u'compress': compressor
            }
        elif isinstance(obj, DatetimeIndex):
            tz = getattr(obj, 'tz', None)

            # store tz info and data as UTC
            if tz is not None:
                tz = u(tz.zone)
                obj = obj.tz_convert('UTC')
            return {
                u'typ': u'datetime_index',
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'dtype': u(obj.dtype.name),
                u'data': convert(obj.asi8),
                u'freq': u_safe(getattr(obj, 'freqstr', None)),
                u'tz': tz,
                u'compress': compressor
            }
        elif isinstance(obj, (IntervalIndex, IntervalArray)):
            if isinstance(obj, IntervalIndex):
                typ = u'interval_index'
            else:
                typ = u'interval_array'
            return {
                u'typ': typ,
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'left': getattr(obj, 'left', None),
                u'right': getattr(obj, 'right', None),
                u'closed': getattr(obj, 'closed', None)
            }
        elif isinstance(obj, MultiIndex):
            return {
                u'typ': u'multi_index',
                u'klass': u(obj.__class__.__name__),
                u'names': getattr(obj, 'names', None),
                u'dtype': u(obj.dtype.name),
                u'data': convert(obj.values),
                u'compress': compressor
            }
        else:
            return {
                u'typ': u'index',
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'dtype': u(obj.dtype.name),
                u'data': convert(obj.values),
                u'compress': compressor
            }

    elif isinstance(obj, Categorical):
        return {
            u'typ': u'category',
            u'klass': u(obj.__class__.__name__),
            u'name': getattr(obj, 'name', None),
            u'codes': obj.codes,
            u'categories': obj.categories,
            u'ordered': obj.ordered,
            u'compress': compressor
        }

    elif isinstance(obj, Series):
        if isinstance(obj, SparseSeries):
            raise NotImplementedError(
                'msgpack sparse series is not implemented')
            # d = {'typ': 'sparse_series',
            #     'klass': obj.__class__.__name__,
            #     'dtype': obj.dtype.name,
            #     'index': obj.index,
            #     'sp_index': obj.sp_index,
            #     'sp_values': convert(obj.sp_values),
            #     'compress': compressor}
            # for f in ['name', 'fill_value', 'kind']:
            #    d[f] = getattr(obj, f, None)
            # return d
        else:
            return {
                u'typ': u'series',
                u'klass': u(obj.__class__.__name__),
                u'name': getattr(obj, 'name', None),
                u'index': obj.index,
                u'dtype': u(obj.dtype.name),
                u'data': convert(obj.values),
                u'compress': compressor
            }
    elif issubclass(tobj, NDFrame):
        if isinstance(obj, SparseDataFrame):
            raise NotImplementedError(
                'msgpack sparse frame is not implemented')
            # d = {'typ': 'sparse_dataframe',
            #     'klass': obj.__class__.__name__,
            #     'columns': obj.columns}
            # for f in ['default_fill_value', 'default_kind']:
            #    d[f] = getattr(obj, f, None)
            # d['data'] = dict([(name, ss)
            #                 for name, ss in compat.iteritems(obj)])
            # return d
        else:

            data = obj._data
            if not data.is_consolidated():
                data = data.consolidate()

            # the block manager
            return {
                u'typ':
                u'block_manager',
                u'klass':
                u(obj.__class__.__name__),
                u'axes':
                data.axes,
                u'blocks': [{
                    u'locs': b.mgr_locs.as_array,
                    u'values': convert(b.values),
                    u'shape': b.values.shape,
                    u'dtype': u(b.dtype.name),
                    u'klass': u(b.__class__.__name__),
                    u'compress': compressor
                } for b in data.blocks]
            }

    elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
                          np.timedelta64)) or obj is NaT:
        if isinstance(obj, Timestamp):
            tz = obj.tzinfo
            if tz is not None:
                tz = u(tz.zone)
            freq = obj.freq
            if freq is not None:
                freq = u(freq.freqstr)
            return {
                u'typ': u'timestamp',
                u'value': obj.value,
                u'freq': freq,
                u'tz': tz
            }
        if obj is NaT:
            return {u'typ': u'nat'}
        elif isinstance(obj, np.timedelta64):
            return {u'typ': u'timedelta64', u'data': obj.view('i8')}
        elif isinstance(obj, timedelta):
            return {
                u'typ': u'timedelta',
                u'data': (obj.days, obj.seconds, obj.microseconds)
            }
        elif isinstance(obj, np.datetime64):
            return {u'typ': u'datetime64', u'data': u(str(obj))}
        elif isinstance(obj, datetime):
            return {u'typ': u'datetime', u'data': u(obj.isoformat())}
        elif isinstance(obj, date):
            return {u'typ': u'date', u'data': u(obj.isoformat())}
        raise Exception("cannot encode this datetimelike object: %s" % obj)
    elif isinstance(obj, Period):
        return {
            u'typ': u'period',
            u'ordinal': obj.ordinal,
            u'freq': u_safe(obj.freqstr)
        }
    elif isinstance(obj, Interval):
        return {
            u'typ': u'interval',
            u'left': obj.left,
            u'right': obj.right,
            u'closed': obj.closed
        }
    elif isinstance(obj, BlockIndex):
        return {
            u'typ': u'block_index',
            u'klass': u(obj.__class__.__name__),
            u'blocs': obj.blocs,
            u'blengths': obj.blengths,
            u'length': obj.length
        }
    elif isinstance(obj, IntIndex):
        return {
            u'typ': u'int_index',
            u'klass': u(obj.__class__.__name__),
            u'indices': obj.indices,
            u'length': obj.length
        }
    elif isinstance(obj, np.ndarray):
        return {
            u'typ': u'ndarray',
            u'shape': obj.shape,
            u'ndim': obj.ndim,
            u'dtype': u(obj.dtype.name),
            u'data': convert(obj),
            u'compress': compressor
        }
    elif isinstance(obj, np.number):
        if np.iscomplexobj(obj):
            return {
                u'typ': u'np_scalar',
                u'sub_typ': u'np_complex',
                u'dtype': u(obj.dtype.name),
                u'real': u(obj.real.__repr__()),
                u'imag': u(obj.imag.__repr__())
            }
        else:
            return {
                u'typ': u'np_scalar',
                u'dtype': u(obj.dtype.name),
                u'data': u(obj.__repr__())
            }
    elif isinstance(obj, complex):
        return {
            u'typ': u'np_complex',
            u'real': u(obj.real.__repr__()),
            u'imag': u(obj.imag.__repr__())
        }

    return obj
Ejemplo n.º 58
0
            if compat.PY3 or ord(path_or_buf[0]) >= 0x80:
                fh = compat.BytesIO(path_or_buf)
                return read(fh)
        finally:
            if fh is not None:
                fh.close()
    elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
        # treat as a buffer like
        return read(path_or_buf)

    raise ValueError('path_or_buf needs to be a string file path or file-like')


dtype_dict = {
    21: np.dtype('M8[ns]'),
    u('datetime64[ns]'): np.dtype('M8[ns]'),
    u('datetime64[us]'): np.dtype('M8[us]'),
    22: np.dtype('m8[ns]'),
    u('timedelta64[ns]'): np.dtype('m8[ns]'),
    u('timedelta64[us]'): np.dtype('m8[us]'),

    # this is platform int, which we need to remap to np.int64
    # for compat on windows platforms
    7: np.dtype('int64'),
    'category': 'category'
}


def dtype_for(t):
    """ return my dtype mapping, whether number or name """
    if t in dtype_dict:
Ejemplo n.º 59
0
 def test_list_mixed(self):
     x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')]
     x_rec = self.encode_decode(x)
     tm.assert_almost_equal(x, x_rec)
Ejemplo n.º 60
0
    def convert_value(self, v):
        """ convert the expression that is in the term to something that is
        accepted by pytables """
        def stringify(value):
            if self.encoding is not None:
                encoder = partial(com.pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = com.pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == u('datetime64') or kind == u('datetime'):
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = pd.Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert('UTC')
            return TermValue(v, v.value, kind)
        elif (isinstance(v, datetime) or hasattr(v, 'timetuple')
              or kind == u('date')):
            v = time.mktime(v.timetuple())
            return TermValue(v, pd.Timestamp(v), kind)
        elif kind == u('timedelta64') or kind == u('timedelta'):
            v = _coerce_scalar_to_timedelta_type(v, unit='s').value
            return TermValue(int(v), v, kind)
        elif meta == u('category'):
            metadata = com._values_from_object(self.metadata)
            result = metadata.searchsorted(v, side='left')
            return TermValue(result, result, u('integer'))
        elif kind == u('integer'):
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == u('float'):
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == u('bool'):
            if isinstance(v, string_types):
                v = not v.strip().lower() in [
                    u('false'),
                    u('f'),
                    u('no'),
                    u('n'),
                    u('none'),
                    u('0'),
                    u('[]'),
                    u('{}'),
                    u('')
                ]
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif not isinstance(v, string_types):
            v = stringify(v)
            return TermValue(v, stringify(v), u('string'))

        # string quoting
        return TermValue(v, stringify(v), u('string'))