Example #1
0
    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [
            [10, 1, "foo", 0.1, 0.01],
            [20, 2, "bar", 0.2, 0.02],
            [30, 3, "baz", 0.3, 0.03],
            [40, 4, "qux", 0.4, 0.04],
        ]

        df = DataFrame(vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ["split", "index", "columns"]:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient="records")
        left = read_json(inp, orient="records", convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient="values")
        left = read_json(inp, orient="values", convert_axes=False)
        assert_frame_equal(left, right)
Example #2
0
def convertToPutJson(csv_file):
    df = cleanColumns(read_csv(csv_file))
    putColumns = ["method", "recordId", "body"]
    putDf = DataFrame(columns = putColumns)

    for recordId in df.index:
        print "Converting data for recordId {recordId}...".format(recordId = recordId)
        body = {}
        
        for col in df.columns:
            body[str(col).strip()] = [str(df[col][recordId]).strip()]
        
        putDfRow = DataFrame([["PUT", str(recordId), body]], columns = putColumns)
        putDf = putDf.append(putDfRow)
    
    json_file = sub("csv|txt", "json", csv_file)
    putDf.to_json(json_file, orient="records")

    with open(json_file, 'r') as target:
        putData = target.read()

    target = open(json_file, 'w')
    putData = putData.replace("},{", "}\n\n{")[1:-1]
    target.write(putData)
    target.close()

    print "Successfully created put data!"
    return json_file
Example #3
0
    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, "timedelta64[ns]")
        # index will be float dtype
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter), check_index_type=False)

        s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float))
        self.assertEqual(s.dtype, "timedelta64[ns]")
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter))

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, "timedelta64[ns]")
        assert_frame_equal(
            frame, pd.read_json(frame.to_json()).apply(converter), check_index_type=False, check_column_type=False
        )

        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )

        result = pd.read_json(frame.to_json(date_unit="ns"))
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result, check_index_type=False)
Example #4
0
    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)
Example #5
0
    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, 'timedelta64[ns]')

        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1]))
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, 'timedelta64[ns]')
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter))

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result)
Example #6
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")
        self.assertRaises(ValueError, df.to_json, orient="records")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split", dtype=False))
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"])
        result = read_json(df.to_json(orient="split"), orient="split")
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"])
            assert_frame_equal(result, df)

        for o in [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ]:
            _check(DataFrame(o, index=[1, 2], columns=["x", "x"]))
Example #7
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a','b'],['c','d']], index=[1,2], columns=['x','y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a','b'],['c','d']],
                  [[1.5,2.5],[3.5,4.5]],
                  [[1,2.5],[3,4.5]],
                  [[Timestamp('20130101'),3.5],[Timestamp('20130102'),4.5]]]:
            _check(DataFrame(o, index=[1,2], columns=['x','x']))
Example #8
0
 def setup(self, index):
     N = 100000
     indexes = {'int': np.arange(N),
                'datetime': date_range('20000101', periods=N, freq='H')}
     df = DataFrame(np.random.randn(N, 5),
                    columns=['float_{}'.format(i) for i in range(5)],
                    index=indexes[index])
     df.to_json(self.fname, orient='records', lines=True)
Example #9
0
    def test_frame_empty(self):
        df = DataFrame(columns=['jim', 'joe'])
        self.assertFalse(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)

        # mixed type
        df['joe'] = df['joe'].astype('i8')
        self.assertTrue(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)
Example #10
0
    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({'a': [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        self.assertEqual(size_before, size_after)
Example #11
0
    def test_frame_double_encoded_labels(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"])

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        assert_frame_equal(df, read_json(df.to_json(orient="columns"), orient="columns"))
        assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index"))
        df_unser = read_json(df.to_json(orient="records"), orient="records")
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)
Example #12
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Example #13
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Example #14
0
    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        self.assertEqual(result, expected)

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        self.assertEqual(result, expected)
        assert_frame_equal(pd.read_json(result, lines=True), df)
Example #15
0
    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype('category')
        self.assertEqual(expected, df.to_json())

        s = df["A"]
        sc = df["B"]
        self.assertEqual(s.to_json(), sc.to_json())
Example #16
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        self.assertEqual(result.index.dtype, np.float64)
        self.assertEqual(result.columns.dtype, np.float64)
        assert_frame_equal(result, df, check_index_type=False, check_column_type=False)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Example #17
0
    def test_frame_non_unique_index(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        unser = read_json(df.to_json(orient="records"), orient="records")
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)
def main():

    # Get links to survey pages
    home_url = "http://www.igmchicago.org/igm-economic-experts-panel"
    home_contents = get_page_contents(home_url)
    urls = re.findall(
        r"<h2><a href=\"(\S+?results\?SurveyID=\S+?)\"", home_contents)
    urls = ["http://www.igmchicago.org" + url for url in urls]

    # Loop through survey pages
    df = DataFrame()
    question_count = 0
    for url in reversed(urls):

        contents = get_page_contents(url)

        questions = re.findall(r"surveyQuestion\">([\s\S]+?)</h3>", contents)
        responder_list = re.findall(
            r"\?id=([\d]+)?\">([\s\w.]+?)</a>", contents)

        responses = re.findall(
            r"<span class=\"option-[\d]+?\">([\s\w.]+?)</span>", contents)
        num_responders = len(responses) / len(questions)

        # Loop through sub-questions (A, B, etc) within each page
        for i, question in enumerate(questions):
            question = clean_string(question)
            question_count += 1
            print(question)

            # Restrict range to responses for this sub-question
            rng = (i * num_responders, (i + 1) * num_responders)

            # Collect sub-question, its url suffix, and the responses
            prefix = "(%03d" % question_count + ") "
            q_responses = Series(
                responses[rng[0]:rng[1]], index=responder_list[rng[0]:rng[1]])
            q_url_suffix = re.findall("=(.+)", url)[0]
            q_responses = q_responses.append(
                Series([q_url_suffix], index=['q_url_suffix']))
            q_responses.name = prefix + question.strip()

            # Add question data to dataframe
            df = df.join(q_responses, how='outer')

    # Move responder id from index to column, only after all joins are complete
    df['responder_id'] = [pair[0] for pair in df.index]
    df.index = [pair[1] if type(pair) == tuple else pair for pair in df.index]

    # Write to file
    df.to_json("survey_results.json")
    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        tm.assert_index_equal(df.columns, unser.columns)
        tm.assert_almost_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)
Example #20
0
    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)
Example #21
0
    def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)
Example #22
0
    def post(self):
        post = json.loads(self.request.body)

        MyClient = riak.RiakClient(protocol=RIAK_PROTOCOL,
                                   http_port=RIAK_HTTP_PORT,
                                   host=RIAK_HOST)

        MyAdminBucket = MyClient.bucket(ADMIN_BUCKET_NAME)

        connection = None
        for c in MyAdminBucket.get('connection').data:
            if c['slug'] == post.get('connection', None):
                connection = c['connection']

        sql = """SELECT * FROM ({}) AS CUBE LIMIT 10;""".format(
            post.get('sql', None))

        e = create_engine(connection)
        connection = e.connect()
        try:
            resoverall = connection.execute(text(sql))
        except:
            self.write({'sql': '', 'msg': 'Error!'})
            self.finish()

        df = DataFrame(resoverall.fetchall())
        if df.empty:
            self.finish()
        df.columns = resoverall.keys()
        df.head()

        self.write({'sql': df.to_json(orient='records'), 'msg': 'Success!'})
        self.finish()
Example #23
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        # the index is serialized as strings....correct?
        assert_frame_equal(result, df)
Example #24
0
    def test_blocks_compat_GH9037(self):
        index = pd.date_range('20000101', periods=10, freq='H')
        df_mixed = DataFrame(OrderedDict(
            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
                     0.95748401, -1.02970536],
            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
                   40314334, 21290235,  4991321, 41903419, 16008365],
            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
            float_2=[-0.0428278, -1.80872357,  3.36042349, -0.7573685,
                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
                     -0.03030452, 1.43366348],
            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
                   34193846, 10561746, 24867120, 76131025]
        ), index=index)

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype('unicode')

        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
                                    orient='split')
        assert_frame_equal(df_mixed, df_roundtrip,
                           check_index_type=True,
                           check_column_type=True,
                           check_frame_type=True,
                           by_blocks=True,
                           check_exact=True)
Example #25
0
 def test_frame_empty_mixedtype(self):
     # mixed type
     df = DataFrame(columns=['jim', 'joe'])
     df['joe'] = df['joe'].astype('i8')
     self.assertTrue(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
Example #26
0
    def test_mixed_timedelta_datetime(self):
        frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
                          dtype=object)

        expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
                                    pd.Timestamp(frame.a[1]).value]})
        result = pd.read_json(frame.to_json(date_unit='ns'),
                              dtype={'a': 'int64'})
        assert_frame_equal(result, expected)
Example #27
0
 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
     # GH 7445
     result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
     expected = '{"test":{}}'
     tm.assert_equal(result, expected)
 def test_default_handler_numpy_unsupported_dtype(self):
     # GH12554 to_json raises 'Unhandled numpy dtype 15'
     df = DataFrame({'a': [1, 2.3, complex(4, -5)],
                     'b': [float('nan'), None, complex(1.2, 0)]},
                    columns=['a', 'b'])
     expected = ('[["(1+0j)","(nan+0j)"],'
                 '["(2.3+0j)","(nan+0j)"],'
                 '["(4-5j)","(1.2+0j)"]]')
     assert df.to_json(default_handler=str, orient="values") == expected
Example #29
0
    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
        dfj2['date'] = Timestamp('20130101')
        dfj2['ints'] = lrange(5)
        dfj2['bools'] = True
        dfj2.index = pd.date_range('20130101',periods=5)

        json = dfj2.to_json()
        result = read_json(json,dtype={'ints' : np.int64, 'bools' : np.bool_})
        assert_frame_equal(result,result)
Example #30
0
    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        dfj2["date"] = Timestamp("20130101")
        dfj2["ints"] = lrange(5)
        dfj2["bools"] = True
        dfj2.index = pd.date_range("20130101", periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
        assert_frame_equal(result, result)