Ejemplo n.º 1
0
    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [
            [10, 1, "foo", 0.1, 0.01],
            [20, 2, "bar", 0.2, 0.02],
            [30, 3, "baz", 0.3, 0.03],
            [40, 4, "qux", 0.4, 0.04],
        ]

        df = DataFrame(vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ["split", "index", "columns"]:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient="records")
        left = read_json(inp, orient="records", convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient="values")
        left = read_json(inp, orient="values", convert_axes=False)
        assert_frame_equal(left, right)
Ejemplo n.º 2
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")
        self.assertRaises(ValueError, df.to_json, orient="records")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split", dtype=False))
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"])
        result = read_json(df.to_json(orient="split"), orient="split")
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"])
            assert_frame_equal(result, df)

        for o in [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ]:
            _check(DataFrame(o, index=[1, 2], columns=["x", "x"]))
Ejemplo n.º 3
0
    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, "timedelta64[ns]")
        # index will be float dtype
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter), check_index_type=False)

        s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float))
        self.assertEqual(s.dtype, "timedelta64[ns]")
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter))

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, "timedelta64[ns]")
        assert_frame_equal(
            frame, pd.read_json(frame.to_json()).apply(converter), check_index_type=False, check_column_type=False
        )

        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )

        result = pd.read_json(frame.to_json(date_unit="ns"))
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result, check_index_type=False)
Ejemplo n.º 4
0
    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)
Ejemplo n.º 5
0
    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, 'timedelta64[ns]')

        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1]))
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, 'timedelta64[ns]')
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter))

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result)
Ejemplo n.º 6
0
def convertToPutJson(csv_file):
    df = cleanColumns(read_csv(csv_file))
    putColumns = ["method", "recordId", "body"]
    putDf = DataFrame(columns = putColumns)

    for recordId in df.index:
        print "Converting data for recordId {recordId}...".format(recordId = recordId)
        body = {}
        
        for col in df.columns:
            body[str(col).strip()] = [str(df[col][recordId]).strip()]
        
        putDfRow = DataFrame([["PUT", str(recordId), body]], columns = putColumns)
        putDf = putDf.append(putDfRow)
    
    json_file = sub("csv|txt", "json", csv_file)
    putDf.to_json(json_file, orient="records")

    with open(json_file, 'r') as target:
        putData = target.read()

    target = open(json_file, 'w')
    putData = putData.replace("},{", "}\n\n{")[1:-1]
    target.write(putData)
    target.close()

    print "Successfully created put data!"
    return json_file
Ejemplo n.º 7
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a','b'],['c','d']], index=[1,2], columns=['x','y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a','b'],['c','d']],
                  [[1.5,2.5],[3.5,4.5]],
                  [[1,2.5],[3,4.5]],
                  [[Timestamp('20130101'),3.5],[Timestamp('20130102'),4.5]]]:
            _check(DataFrame(o, index=[1,2], columns=['x','x']))
Ejemplo n.º 8
0
 def setup(self, index):
     N = 100000
     indexes = {'int': np.arange(N),
                'datetime': date_range('20000101', periods=N, freq='H')}
     df = DataFrame(np.random.randn(N, 5),
                    columns=['float_{}'.format(i) for i in range(5)],
                    index=indexes[index])
     df.to_json(self.fname, orient='records', lines=True)
Ejemplo n.º 9
0
    def test_frame_empty(self):
        df = DataFrame(columns=['jim', 'joe'])
        self.assertFalse(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)

        # mixed type
        df['joe'] = df['joe'].astype('i8')
        self.assertTrue(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)
Ejemplo n.º 10
0
    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({'a': [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        self.assertEqual(size_before, size_after)
Ejemplo n.º 11
0
    def test_frame_double_encoded_labels(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"])

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        assert_frame_equal(df, read_json(df.to_json(orient="columns"), orient="columns"))
        assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index"))
        df_unser = read_json(df.to_json(orient="records"), orient="records")
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)
Ejemplo n.º 12
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Ejemplo n.º 13
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Ejemplo n.º 14
0
    def test_frame_non_unique_index(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        unser = read_json(df.to_json(orient="records"), orient="records")
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)
Ejemplo n.º 15
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        self.assertEqual(result.index.dtype, np.float64)
        self.assertEqual(result.columns.dtype, np.float64)
        assert_frame_equal(result, df, check_index_type=False, check_column_type=False)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)
Ejemplo n.º 16
0
    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype('category')
        self.assertEqual(expected, df.to_json())

        s = df["A"]
        sc = df["B"]
        self.assertEqual(s.to_json(), sc.to_json())
Ejemplo n.º 17
0
    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        self.assertEqual(result, expected)

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        self.assertEqual(result, expected)
        assert_frame_equal(pd.read_json(result, lines=True), df)
Ejemplo n.º 18
0
def main():

    # Get links to survey pages
    home_url = "http://www.igmchicago.org/igm-economic-experts-panel"
    home_contents = get_page_contents(home_url)
    urls = re.findall(
        r"<h2><a href=\"(\S+?results\?SurveyID=\S+?)\"", home_contents)
    urls = ["http://www.igmchicago.org" + url for url in urls]

    # Loop through survey pages
    df = DataFrame()
    question_count = 0
    for url in reversed(urls):

        contents = get_page_contents(url)

        questions = re.findall(r"surveyQuestion\">([\s\S]+?)</h3>", contents)
        responder_list = re.findall(
            r"\?id=([\d]+)?\">([\s\w.]+?)</a>", contents)

        responses = re.findall(
            r"<span class=\"option-[\d]+?\">([\s\w.]+?)</span>", contents)
        num_responders = len(responses) / len(questions)

        # Loop through sub-questions (A, B, etc) within each page
        for i, question in enumerate(questions):
            question = clean_string(question)
            question_count += 1
            print(question)

            # Restrict range to responses for this sub-question
            rng = (i * num_responders, (i + 1) * num_responders)

            # Collect sub-question, its url suffix, and the responses
            prefix = "(%03d" % question_count + ") "
            q_responses = Series(
                responses[rng[0]:rng[1]], index=responder_list[rng[0]:rng[1]])
            q_url_suffix = re.findall("=(.+)", url)[0]
            q_responses = q_responses.append(
                Series([q_url_suffix], index=['q_url_suffix']))
            q_responses.name = prefix + question.strip()

            # Add question data to dataframe
            df = df.join(q_responses, how='outer')

    # Move responder id from index to column, only after all joins are complete
    df['responder_id'] = [pair[0] for pair in df.index]
    df.index = [pair[1] if type(pair) == tuple else pair for pair in df.index]

    # Write to file
    df.to_json("survey_results.json")
Ejemplo n.º 19
0
    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)
Ejemplo n.º 20
0
    def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)
Ejemplo n.º 21
0
    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        tm.assert_index_equal(df.columns, unser.columns)
        tm.assert_almost_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)
Ejemplo n.º 22
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        # the index is serialized as strings....correct?
        assert_frame_equal(result, df)
Ejemplo n.º 23
0
    def test_blocks_compat_GH9037(self):
        index = pd.date_range('20000101', periods=10, freq='H')
        df_mixed = DataFrame(OrderedDict(
            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
                     0.95748401, -1.02970536],
            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
                   40314334, 21290235,  4991321, 41903419, 16008365],
            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
            float_2=[-0.0428278, -1.80872357,  3.36042349, -0.7573685,
                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
                     -0.03030452, 1.43366348],
            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
                   34193846, 10561746, 24867120, 76131025]
        ), index=index)

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype('unicode')

        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
                                    orient='split')
        assert_frame_equal(df_mixed, df_roundtrip,
                           check_index_type=True,
                           check_column_type=True,
                           check_frame_type=True,
                           by_blocks=True,
                           check_exact=True)
Ejemplo n.º 24
0
    def post(self):
        post = json.loads(self.request.body)

        MyClient = riak.RiakClient(protocol=RIAK_PROTOCOL,
                                   http_port=RIAK_HTTP_PORT,
                                   host=RIAK_HOST)

        MyAdminBucket = MyClient.bucket(ADMIN_BUCKET_NAME)

        connection = None
        for c in MyAdminBucket.get('connection').data:
            if c['slug'] == post.get('connection', None):
                connection = c['connection']

        sql = """SELECT * FROM ({}) AS CUBE LIMIT 10;""".format(
            post.get('sql', None))

        e = create_engine(connection)
        connection = e.connect()
        try:
            resoverall = connection.execute(text(sql))
        except:
            self.write({'sql': '', 'msg': 'Error!'})
            self.finish()

        df = DataFrame(resoverall.fetchall())
        if df.empty:
            self.finish()
        df.columns = resoverall.keys()
        df.head()

        self.write({'sql': df.to_json(orient='records'), 'msg': 'Success!'})
        self.finish()
Ejemplo n.º 25
0
 def test_frame_empty_mixedtype(self):
     # mixed type
     df = DataFrame(columns=['jim', 'joe'])
     df['joe'] = df['joe'].astype('i8')
     self.assertTrue(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
Ejemplo n.º 26
0
 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
     # GH 7445
     result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
     expected = '{"test":{}}'
     tm.assert_equal(result, expected)
Ejemplo n.º 27
0
 def test_default_handler_numpy_unsupported_dtype(self):
     # GH12554 to_json raises 'Unhandled numpy dtype 15'
     df = DataFrame({'a': [1, 2.3, complex(4, -5)],
                     'b': [float('nan'), None, complex(1.2, 0)]},
                    columns=['a', 'b'])
     expected = ('[["(1+0j)","(nan+0j)"],'
                 '["(2.3+0j)","(nan+0j)"],'
                 '["(4-5j)","(1.2+0j)"]]')
     assert df.to_json(default_handler=str, orient="values") == expected
Ejemplo n.º 28
0
    def test_mixed_timedelta_datetime(self):
        frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
                          dtype=object)

        expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
                                    pd.Timestamp(frame.a[1]).value]})
        result = pd.read_json(frame.to_json(date_unit='ns'),
                              dtype={'a': 'int64'})
        assert_frame_equal(result, expected)
Ejemplo n.º 29
0
    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
        dfj2['date'] = Timestamp('20130101')
        dfj2['ints'] = lrange(5)
        dfj2['bools'] = True
        dfj2.index = pd.date_range('20130101',periods=5)

        json = dfj2.to_json()
        result = read_json(json,dtype={'ints' : np.int64, 'bools' : np.bool_})
        assert_frame_equal(result,result)
Ejemplo n.º 30
0
    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        dfj2["date"] = Timestamp("20130101")
        dfj2["ints"] = lrange(5)
        dfj2["bools"] = True
        dfj2.index = pd.date_range("20130101", periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
        assert_frame_equal(result, result)
Ejemplo n.º 31
0
class TestRun:
    """
    represents the collected data of a particular (set of) log file(s)
    """
    FILE_EXTENSION = ".trn"
    """ the file extension for saving and loading test runs from """
    def __init__(self, filenames=[]):
        self.inputfromstdin = False
        self.filenames = []
        for filename in filenames:
            self.appendFilename(filename)
        self.data = DataFrame(dtype=object)

        self.datadict = {}
        self.currentproblemdata = {}
        self.currentproblemid = 0
        """ meta data represent problem-independent data """
        self.metadatadict = {}
        self.parametervalues = {}
        self.defaultparametervalues = {}
        self.keyset = set()

        self.currentfileiterator = None
        self.currentfile = None
        self.consumedStdinput = []

    def __iter__(self):
        if (self.currentfile != ""):
            with open(self.currentfile, "r") as f:
                for line in enumerate(f):
                    yield line
        else:
            for line in enumerate(self.consumedStdinput):
                yield line
            for line in enumerate(sys.stdin, len(self.consumedStdinput)):
                yield line

    def iterationPrepare(self):
        filenames = sorted(
            self.filenames,
            key=lambda x: misc.sortingKeyContext(misc.filenameGetContext(x)))
        self.currentfileiterator = iter(filenames)

    def iterationNextFile(self):
        try:
            self.currentfile = next(self.currentfileiterator)
            return True
        except StopIteration:
            return False

    def iterationAddConsumedStdinput(self, consumedlines):
        if self.currentfile == "":
            for line in consumedlines:
                self.consumedStdinput.append(line)

    def iterationCleanUp(self):
        self.currentfileiterator = None

    def iterationGetCurrentFile(self):
        return self.currentfile

    def setInputFromStdin(self):
        self.filenames.append("")

    def appendFilename(self, filename):
        # TODO test this
        """Append a file name to the list of filenames of this test run
        """
        filename = os.path.abspath(filename)
        if filename not in self.filenames:
            self.filenames.append(filename)
        else:
            return

        extension = misc.filenameGetContext(filename)
        if extension in [Key.CONTEXT_ERRFILE, Key.CONTEXT_LOGFILE]:
            metafile = os.path.splitext(filename)[0] + ".meta"

            if os.path.isfile(metafile) and (metafile not in self.filenames):
                self.filenames.append(metafile)

    def addDataByName(self, datakeys, data, problem):
        """Add the current data under the specified dataname

        Readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length

        after data was added, the method getProblemDataById() can be used for access
        """
        for problemid, name in self.datadict.setdefault(Key.ProblemName,
                                                        {}).items():
            if name == problem:
                self.addDataById(datakeys, data, problemid)

    def addData(self, datakey, data):
        """Add data to current problem

        readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length
        """
        logging.debug("TestRun %s receives data Datakey %s, %s" %
                      (self.getName(), repr(datakey), repr(data)))

        if type(datakey) is list and type(data) is list:
            for key, datum in zip(datakey, data):
                self.currentproblemdata[key] = datum
        else:
            self.currentproblemdata[datakey] = data

    def getCurrentProblemData(self, datakey: str = None):
        """Return current problem data, either entirely or for specified data key
        """
        if datakey is None:
            return self.currentproblemdata
        else:
            return self.currentproblemdata.get(datakey)

    def addDataById(self, datakeys, data, problemid):
        """Add the data or to the specified problem

        readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length

        after data was added, the method getProblemDataById() can be used for access if a problemid was given
        """
        # check for the right dictionary to store the data
        logging.debug("TestRun %s receives data Datakey %s, %s to problem %s" %
                      (self.getName(), repr(datakeys), repr(data), problemid))

        if type(datakeys) is list and type(data) is list:
            for key, datum in zip(datakeys, data):
                self.datadict.setdefault(key, {})[problemid] = datum
        else:
            self.datadict.setdefault(datakeys, {})[problemid] = data

    def addParameterValue(self, paramname, paramval):
        """Store the value for a parameter of a given name for this test run
        """
        self.parametervalues[paramname] = paramval

    def addDefaultParameterValue(self, paramname, defaultval):
        """Store the value for a parameter of a given name for this test run
        """
        self.defaultparametervalues[paramname] = defaultval

    def getParameterData(self):
        """Return two dictionaries that map parameter names to  their value and default value
        """
        return (self.parametervalues, self.defaultparametervalues)

    def getLogFile(self, fileextension=".out"):
        """Returns the name of the logfile
        """
        for filename in self.filenames:
            if filename.endswith(fileextension):
                return filename
        return None

    def getKeySet(self):
        """Return a list or set of keys (which are the columns headers of the data)
        """
        if self.datadict != {}:
            return list(self.datadict.keys())
        else:
            return set(self.data.columns)

    def emptyData(self):
        """Empty all data of current testrun
        """
        self.data = DataFrame(dtype=object)

    def getMetaData(self):
        """Return a data frame containing meta data
        """
        return DataFrame(self.metadatadict)

    def finalizeCurrentCollection(self, solver):
        """ Any data of the current problem is saved as a new row in datadict
        """
        if self.currentproblemdata != {}:
            # Add data collected by solver into currentproblemdata, such as primal and dual bound,
            self.addData(*solver.getData())
            for key in self.metadatadict.keys():
                self.addData(key, self.metadatadict[key])

            for key in self.currentproblemdata.keys():
                self.datadict.setdefault(
                    key,
                    {})[self.currentproblemid] = self.currentproblemdata[key]
            self.currentproblemdata = {}
            self.currentproblemid = self.currentproblemid + 1

    def finishedReadingFile(self, solver):
        """ Save data of current problem
        """
        self.finalizeCurrentCollection(solver)

    def setupForDataCollection(self):
        """ Save data in a python dictionary for easier data collection
        """
        self.datadict = self.data.to_dict()
        self.data = DataFrame(dtype=object)

    def setupAfterDataCollection(self):
        """ Save data in a pandas dataframe for futher use (i.e. reading and finding data)
        """
        self.data = DataFrame(self.datadict)
        self.datadict = {}

    def hasProblemName(self, problemname):
        """ Return if already collected data for a problem with given name
        """
        if self.datadict != {}:
            return problemname in self.datadict.get(Key.ProblemName,
                                                    {}).values()
        else:
            if Key.ProblemName in self.data.keys():
                for name in self.data[Key.ProblemName]:
                    if problemname == name:
                        return True
            return False

    def hasProblemId(self, problemid):
        """ Returns if there is already data collected for a problem with given id
        """
        return problemid in range(self.currentproblemid)

    def getProblemIds(self):
        """ Return a list of problemids
        """
        return list(range(self.currentproblemid))

    def getProblemNames(self):
        """ Return an (unsorted) list of problemnames
        """
        if self.datadict != {}:
            return list(self.datadict.get(Key.ProblemName, []))
        else:
            if Key.ProblemName in self.data.columns:
                return list(self.data[Key.ProblemName])
            else:
                return []

    def getProblemDataByName(self, problemname, datakey):
        """Return the data collected for problems with given name
        """
        collecteddata = []
        if self.datadict != {}:
            for key, dat in self.datadict.get("ProblemName", None):
                if dat == problemname:
                    collecteddata.append(self.getProblemDataById(key, datakey))
        else:
            collecteddata = list(self.data[self.data[Key.ProblemName] ==
                                           problemname].loc[:, datakey])
        try:
            return collecteddata[0]
        except IndexError:
            return None

    def getProblemDataById(self, problemid, datakey=None):
        """Return data for a specific datakey, or None, if no such data exists for this (probname, datakey) key pair
        """
        if datakey is None:
            try:
                return ",".join("%s: %s" %
                                (key, self.getProblemDataById(problemid, key))
                                for key in self.getKeySet())
            except KeyError:
                return "<%s> not contained in keys, have only\n%s" % \
                    (problemid, ",".join((ind for ind in self.getProblemIds())))
        else:
            if self.datadict != {}:
                return self.datadict.get(datakey, {}).get(problemid, None)
            else:
                try:
                    data = self.data.loc[problemid, datakey]
                except KeyError:
                    data = None
                if type(data) is list or notnull(data):
                    return data
                else:
                    return None

    def getProblemsDataById(self, problemids, datakey):
        """ Return data for a list of problems
        """
        if self.datadict != {}:
            return [
                self.datadict.get(datakey, {}).get(id, None)
                for id in problemids
            ]
        else:
            return self.data.loc[problemids, datakey]

    def deleteProblemDataById(self, problemid):
        """ Delete all data acquired so far for problemid
        """
        if self.datadict != {}:
            for key in list(self.datadict.keys()):
                try:
                    del self.datadict[key][problemid]
                except KeyError:
                    pass
        else:
            try:
                self.data.drop(problemid, inplace=True)
            except TypeError:
                # needs to be caught for pandas version < 0.13
                self.data = self.data.drop(problemid)

    def saveToFile(self, filename):
        """ Dump the pickled instance of itself into a .trn-file
        """
        try:
            f = open(filename, 'wb')
            pickle.dump(self, f, protocol=2)
            f.close()
        except IOError:
            print("Could not open %s for saving test run" % filename)

    def emptyCurrentProblemData(self):
        """ Empty data of currently read problem
        """
        return self.currentproblemdata == {}

    def printToConsole(self, formatstr="{idx}: {d}"):
        """ Print data to console
        """
        for idx, d in self.data.iterrows():
            #            pd.set_option('display.max_rows', len(d))
            print(formatstr.format(d=d, idx=idx))
#            pd.reset_option('display.max_rows')

    def toJson(self):
        """ Return the data-object in json
        """
        return self.data.to_json()

    @staticmethod
    def loadFromFile(filename):
        """ Loads a .trn-File containing a particular instance of TestRun
        """
        try:
            if filename.endswith(".gz"):
                import gzip
                f = gzip.open(filename, 'rb')
            else:
                f = open(filename, 'rb')
        except IOError:
            print("Could not open %s for loading test run" % filename)
            return None
        testrun = pickle.load(f)
        f.close()
        return testrun

    def getData(self, datakey=None):
        """Return a data frame object of the acquired data
        """
        return self.data

    def getCurrentLogfilename(self):
        """ Return the name of the current logfile 
        """
        return os.path.basename(self.filenames[0])

    def getSettings(self):
        """ Return the settings associated with this test run
        """
        try:
            return self.data['Settings'][0]
        except KeyError:
            return os.path.basename(self.filenames[0]).split('.')[-2]
#

    def getName(self):
        """ Convenience method to make test run a manageable object
        """
        return self.getIdentification()

    def getIdentification(self):
        """ Return identification string of this test run
        """
        # TODO Is this still the way to do this? What if we are reading from stdin?
        return os.path.splitext(os.path.basename(self.filenames[0]))[0]

    def problemGetOptimalSolution(self, problemid):
        """ Return objective of an optimal or a best known solution

        ... from solu file, or None, if no such data has been acquired
        """
        try:
            return self.getProblemDataById(problemid, 'OptVal')
        except KeyError:
            #            print(self.getIdentification() + " has no solu file value for ", problemid)
            return None

    def problemGetSoluFileStatus(self, problemid):
        """ Return 'unkn', 'inf', 'best', 'opt'

        ... as solu file status, or None, if no solu file status
        exists for this problem
        """
        try:
            return self.getProblemDataById(problemid, 'SoluFileStatus')
        except KeyError:
            #            print(self.getIdentification() + " has no solu file status for ", problemid)
            return None
Ejemplo n.º 32
0
 def test_to_jsonl(self):
     # GH9180
     df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
     result = df.to_json(orient="records", lines=True)
     expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
     self.assertEqual(result, expected)
Ejemplo n.º 33
0
 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)),
                        df,
                        check_index_type=False)
Ejemplo n.º 34
0
def val(epoch, dataset, config, log_dir):
    """Validate model."""
    model_config = config['model']
    sess_config = config['session']

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    example_id = 0

    with tf.Graph().as_default():
        model = GRA(model_config)
        model.build_inference()
        result = DataFrame(columns=['id', 'answer'])
        with tf.Session(config=sess_config) as sess:
            sum_dir = os.path.join(log_dir, 'summary')
            summary_writer = tf.summary.FileWriter(sum_dir)

            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            stats_dir = os.path.join(log_dir, 'stats')
            stats_path = os.path.join(stats_dir, 'val.json')
            if os.path.exists(stats_path):
                print('load stats file {}.'.format(stats_path))
                stats = pd.read_json(stats_path, 'records')
            else:
                print('no stats file.')
                if not os.path.exists(stats_dir):
                    os.makedirs(stats_dir)
                stats = pd.DataFrame(columns=['epoch', 'acc'])

            # val iterate over examples
            correct = 0

            while dataset.has_val_example:
                vgg, c3d, question, answer = dataset.get_val_example()
                c3d = np.zeros((len(c3d), len(c3d[0])))
                feed_dict = {
                    model.appear: [vgg],
                    model.motion: [c3d],
                    model.question_encode: [question],
                }
                prediction = sess.run(model.prediction, feed_dict=feed_dict)
                prediction = prediction[1]
                for i, row in enumerate(prediction):
                    for index in row:
                        if answer[index] == 1:
                            correct += 1
                            break
                result = result.append({
                    'id': example_id,
                    'answer': prediction
                },
                                       ignore_index=True)
                example_id += 1
            acc = correct / dataset.val_example_total
            result.to_json(
                os.path.join(
                    log_dir, 'validation_' + str(int(acc * 100)) + '_' +
                    str(epoch + lajidaima) + '.json'), 'records')
            print('\n[VAL] epoch {}, acc {:.5f}.\n'.format(
                epoch + lajidaima, acc))

            summary = tf.Summary()
            summary.value.add(tag='val/acc', simple_value=float(acc))
            summary_writer.add_summary(summary, epoch + lajidaima)

            record = Series([epoch + lajidaima, acc], ['epoch', 'acc'])
            stats = stats.append(record, ignore_index=True)
            stats.to_json(stats_path, 'records')

            dataset.reset_val()
            return acc
Ejemplo n.º 35
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jul  3 09:31:39 2019

@author: potlus
"""

import pandas as pd
from pandas import DataFrame

#Read File Path
path = 'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/SN_CMDB_Apps.csv'
cmdb = pd.read_csv(path, header=0,encoding = 'unicode_escape')
cmdb.shape
df = DataFrame(cmdb, columns= ['Sys_Id', 'SW_Name', 'Technical_Lead', 'Support_Group', 'Operational_Status'])

Export = df.to_json (r'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/sampleCMDB.json', orient='records', lines=True)
Ejemplo n.º 36
0
def dataframe_to_json(data: DataFrame, path: Path, **kwargs):
    ''' Saves a pandas DataFrame into a UTF-8 encoded JSON file '''
    with open(path, 'w', encoding='UTF-8') as file:
        data.to_json(file, force_ascii=False, **kwargs)
Ejemplo n.º 37
0
 def test_default_handler(self):
     value = object()
     frame = DataFrame({'a': [7, value]})
     expected = DataFrame({'a': [7, str(value)]})
     result = pd.read_json(frame.to_json(default_handler=str))
     assert_frame_equal(expected, result, check_index_type=False)
Ejemplo n.º 38
0
 def test_default_handler(self):
     value = object()
     frame = DataFrame({'a': ['a', value]})
     expected = frame.applymap(str)
     result = pd.read_json(frame.to_json(default_handler=str))
     assert_frame_equal(expected, result)
Ejemplo n.º 39
0
                    f["properties"]["G"],
                    f["properties"]["B"],
                ])
            closest_index = cdist(XA=np.array([d]),
                                  XB=np.array(fl),
                                  metric=func).argmin()
            rgb = fl[closest_index]
            final_data.append([d[0], d[1], d[2], rgb[2], rgb[3], rgb[4]])
            print(len(final_data))
            time.sleep(1)
    except Exception as e:
        print(e)


features_size = 1000

groups = grouper(features_size, xa)

part_func = partial(gen_feature_color, space_color=space, func=func)

with concurrent.futures.ProcessPoolExecutor(max_workers=60) as executor:
    executor.map(part_func, groups, chunksize=3)

from pandas import DataFrame

df = DataFrame(list(final_data))

df.to_json("FINAL_DATA.json", orient="values")

####################################
Ejemplo n.º 40
0
def test(dataset, config, log_dir, question_type_dict):
    """Test model, output prediction as json file."""
    model_config = config['model']
    sess_config = config['session']

    question_type_correct_count = copy.deepcopy(question_type_dict)
    question_type_all_count = copy.deepcopy(question_type_dict)
    for k in question_type_dict:
        question_type_correct_count[k] = 0
        question_type_all_count[k] = 0

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    with tf.Graph().as_default():
        model = Multimodal_DMN_VM(model_config)
        model.build_inference()

        with tf.Session(config=sess_config) as sess:
            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            # test iterate over examples
            result = DataFrame(columns=['id', 'answer'])
            correct = 0

            groundtruth_answer_list = []
            predict_answer_list = []
            while dataset.has_test_example:
                vgg, c3d, vgg_conv5, vgg_conv5_3, question, answer, example_id, question_len = dataset.get_test_example(
                )
                input_len = 20
                feed_dict = {
                    model.c3d_video_feature: [c3d],
                    model.vgg_video_feature: [vgg],
                    model.question_encode: [question],
                    model.question_len_placeholder: [question_len],
                    model.video_len_placeholder: [input_len],
                    model.keep_placeholder: 1.0
                }
                prediction = sess.run(model.prediction, feed_dict=feed_dict)
                prediction = prediction[0]

                result = result.append(
                    {
                        'id': example_id,
                        'answer': answerset[prediction]
                    },
                    ignore_index=True)
                if answerset[prediction] == answer:
                    correct += 1
                    question_type_correct_count[question[0]] += 1
                question_type_all_count[question[0]] += 1

                groundtruth_answer_list.append(answer)
                predict_answer_list.append(answerset[prediction])

            result.to_json(os.path.join(log_dir, 'prediction.json'), 'records')
            acc = correct * 1.0 / dataset.test_example_total
            WUPS_0_0 = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, 0.0)
            WUPS_0_9 = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, 0.9)
            WUPS_acc = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, -1)
            print('[TEST] acc {:.5f}.\n'.format(acc))
            print('[TEST], WUPS@acc {:.5f}.\n'.format(WUPS_acc))
            print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_0))
            print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_9))

            print('######## question type acc list ######### ')
            for k in question_type_dict:
                print(question_type_dict[k] +
                      ' acc {:.5f}.'.format(question_type_correct_count[k] *
                                            1.0 / question_type_all_count[k]))
                print('correct = {:d}, all = {:d}'.format(
                    question_type_correct_count[k],
                    question_type_all_count[k]))

            dataset.reset_test()
            return acc
Ejemplo n.º 41
0
class TestTableOrient:
    def setup_method(self, method):
        self.df = DataFrame(
            {
                "A": [1, 2, 3, 4],
                "B": ["a", "b", "c", "c"],
                "C": pd.date_range("2016-01-01", freq="d", periods=4),
                "D": pd.timedelta_range("1H", periods=4, freq="T"),
                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
                "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
                "G": [1.0, 2.0, 3, 4.0],
                "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
            },
            index=pd.Index(range(4), name="idx"),
        )

    def test_build_series(self):
        s = pd.Series([1, 2], name="a")
        s.index.name = "id"
        result = s.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result["schema"]
        result["schema"].pop("pandas_version")

        fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]

        schema = {"fields": fields, "primaryKey": ["id"]}

        expected = OrderedDict(
            [
                ("schema", schema),
                (
                    "data",
                    [
                        OrderedDict([("id", 0), ("a", 1)]),
                        OrderedDict([("id", 1), ("a", 2)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_read_json_from_to_json_results(self):
        # GH32383
        df = pd.DataFrame(
            {
                "_id": {"row_0": 0},
                "category": {"row_0": "Goods"},
                "recommender_id": {"row_0": 3},
                "recommender_name_jp": {"row_0": "浦田"},
                "recommender_name_en": {"row_0": "Urata"},
                "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
                "name_en": {"row_0": "Hakata Dolls Matsuo"},
            }
        )
        result1 = pd.read_json(df.to_json())
        result2 = pd.DataFrame.from_dict(json.loads(df.to_json()))
        tm.assert_frame_equal(result1, df)
        tm.assert_frame_equal(result2, df)

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = "idx"
        result = df.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result["schema"]
        result["schema"].pop("pandas_version")

        fields = [
            {"name": "idx", "type": "integer"},
            {"name": "A", "type": "integer"},
            {"name": "B", "type": "string"},
            {"name": "C", "type": "datetime"},
            {"name": "D", "type": "duration"},
            {
                "constraints": {"enum": ["a", "b", "c"]},
                "name": "E",
                "ordered": False,
                "type": "any",
            },
            {
                "constraints": {"enum": ["a", "b", "c"]},
                "name": "F",
                "ordered": True,
                "type": "any",
            },
            {"name": "G", "type": "number"},
            {"name": "H", "type": "datetime", "tz": "US/Central"},
        ]

        schema = {"fields": fields, "primaryKey": ["idx"]}
        data = [
            OrderedDict(
                [
                    ("idx", 0),
                    ("A", 1),
                    ("B", "a"),
                    ("C", "2016-01-01T00:00:00.000Z"),
                    ("D", "P0DT1H0M0S"),
                    ("E", "a"),
                    ("F", "a"),
                    ("G", 1.0),
                    ("H", "2016-01-01T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 1),
                    ("A", 2),
                    ("B", "b"),
                    ("C", "2016-01-02T00:00:00.000Z"),
                    ("D", "P0DT1H1M0S"),
                    ("E", "b"),
                    ("F", "b"),
                    ("G", 2.0),
                    ("H", "2016-01-02T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 2),
                    ("A", 3),
                    ("B", "c"),
                    ("C", "2016-01-03T00:00:00.000Z"),
                    ("D", "P0DT1H2M0S"),
                    ("E", "c"),
                    ("F", "c"),
                    ("G", 3.0),
                    ("H", "2016-01-03T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 3),
                    ("A", 4),
                    ("B", "c"),
                    ("C", "2016-01-04T00:00:00.000Z"),
                    ("D", "P0DT1H3M0S"),
                    ("E", "c"),
                    ("F", "c"),
                    ("G", 4.0),
                    ("H", "2016-01-04T06:00:00.000Z"),
                ]
            ),
        ]
        expected = OrderedDict([("schema", schema), ("data", data)])

        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1.0, 2.0])
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        expected = OrderedDict(
            [
                (
                    "schema",
                    {
                        "fields": [
                            {"name": "index", "type": "number"},
                            {"name": "values", "type": "integer"},
                        ],
                        "primaryKey": ["index"],
                    },
                ),
                (
                    "data",
                    [
                        OrderedDict([("index", 1.0), ("values", 1)]),
                        OrderedDict([("index", 2.0), ("values", 1)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range("2016", freq="Q-JAN", periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        fields = [
            {"freq": "Q-JAN", "name": "index", "type": "datetime"},
            {"name": "values", "type": "integer"},
        ]

        schema = {"fields": fields, "primaryKey": ["index"]}
        data = [
            OrderedDict([("index", "2015-11-01T00:00:00.000Z"), ("values", 1)]),
            OrderedDict([("index", "2016-02-01T00:00:00.000Z"), ("values", 1)]),
        ]
        expected = OrderedDict([("schema", schema), ("data", data)])

        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        expected = OrderedDict(
            [
                (
                    "schema",
                    {
                        "fields": [
                            {
                                "name": "index",
                                "type": "any",
                                "constraints": {"enum": ["a", "b"]},
                                "ordered": False,
                            },
                            {"name": "values", "type": "integer"},
                        ],
                        "primaryKey": ["index"],
                    },
                ),
                (
                    "data",
                    [
                        OrderedDict([("index", "a"), ("values", 1)]),
                        OrderedDict([("index", "b"), ("values", 1)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient="table", date_format="epoch")

        # others work
        self.df.to_json(orient="table", date_format="iso")
        self.df.to_json(orient="table")

    def test_convert_pandas_type_to_json_field_int(self, index_or_series):
        kind = index_or_series
        data = [1, 2, 3]
        result = convert_pandas_type_to_json_field(kind(data, name="name"))
        expected = {"name": "name", "type": "integer"}
        assert result == expected

    def test_convert_pandas_type_to_json_field_float(self, index_or_series):
        kind = index_or_series
        data = [1.0, 2.0, 3.0]
        result = convert_pandas_type_to_json_field(kind(data, name="name"))
        expected = {"name": "name", "type": "number"}
        assert result == expected

    @pytest.mark.parametrize(
        "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
    )
    @pytest.mark.parametrize("wrapper", [None, pd.Series])
    def test_convert_pandas_type_to_json_field_datetime(
        self, dt_args, extra_exp, wrapper
    ):
        data = [1.0, 2.0, 3.0]
        data = pd.to_datetime(data, **dt_args)
        if wrapper is pd.Series:
            data = pd.Series(data, name="values")
        result = convert_pandas_type_to_json_field(data)
        expected = {"name": "values", "type": "datetime"}
        expected.update(extra_exp)
        assert result == expected

    def test_convert_pandas_type_to_json_period_range(self):
        arr = pd.period_range("2016", freq="A-DEC", periods=4)
        result = convert_pandas_type_to_json_field(arr)
        expected = {"name": "values", "type": "datetime", "freq": "A-DEC"}
        assert result == expected

    @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
    @pytest.mark.parametrize("ordered", [True, False])
    def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
        data = ["a", "b", "c"]
        if kind is pd.Categorical:
            arr = pd.Series(kind(data, ordered=ordered), name="cats")
        elif kind is pd.CategoricalIndex:
            arr = kind(data, ordered=ordered, name="cats")

        result = convert_pandas_type_to_json_field(arr)
        expected = {
            "name": "cats",
            "type": "any",
            "constraints": {"enum": data},
            "ordered": ordered,
        }
        assert result == expected

    @pytest.mark.parametrize(
        "inp,exp",
        [
            ({"type": "integer"}, "int64"),
            ({"type": "number"}, "float64"),
            ({"type": "boolean"}, "bool"),
            ({"type": "duration"}, "timedelta64"),
            ({"type": "datetime"}, "datetime64[ns]"),
            ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
            ({"type": "any"}, "object"),
            (
                {
                    "type": "any",
                    "constraints": {"enum": ["a", "b", "c"]},
                    "ordered": False,
                },
                CategoricalDtype(categories=["a", "b", "c"], ordered=False),
            ),
            (
                {
                    "type": "any",
                    "constraints": {"enum": ["a", "b", "c"]},
                    "ordered": True,
                },
                CategoricalDtype(categories=["a", "b", "c"], ordered=True),
            ),
            ({"type": "string"}, "object"),
        ],
    )
    def test_convert_json_field_to_pandas_type(self, inp, exp):
        field = {"name": "foo"}
        field.update(inp)
        assert convert_json_field_to_pandas_type(field) == exp

    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
    def test_convert_json_field_to_pandas_type_raises(self, inp):
        field = {"type": inp}
        with pytest.raises(
            ValueError, match=f"Unsupported or invalid field type: {inp}"
        ):
            convert_json_field_to_pandas_type(field)

    def test_categorical(self):
        s = pd.Series(pd.Categorical(["a", "b", "a"]))
        s.index.name = "idx"
        result = s.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        fields = [
            {"name": "idx", "type": "integer"},
            {
                "constraints": {"enum": ["a", "b"]},
                "name": "values",
                "ordered": False,
                "type": "any",
            },
        ]

        expected = OrderedDict(
            [
                ("schema", {"fields": fields, "primaryKey": ["idx"]}),
                (
                    "data",
                    [
                        OrderedDict([("idx", 0), ("values", "a")]),
                        OrderedDict([("idx", 1), ("values", "b")]),
                        OrderedDict([("idx", 2), ("values", "a")]),
                    ],
                ),
            ]
        )

        assert result == expected

    @pytest.mark.parametrize(
        "idx,nm,prop",
        [
            (pd.Index([1]), "index", "name"),
            (pd.Index([1], name="myname"), "myname", "name"),
            (
                pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
                ["level_0", "level_1"],
                "names",
            ),
            (
                pd.MultiIndex.from_product(
                    [("a", "b"), ("c", "d")], names=["n1", "n2"]
                ),
                ["n1", "n2"],
                "names",
            ),
            (
                pd.MultiIndex.from_product(
                    [("a", "b"), ("c", "d")], names=["n1", None]
                ),
                ["n1", "level_1"],
                "names",
            ),
        ],
    )
    def test_set_names_unset(self, idx, nm, prop):
        data = pd.Series(1, idx)
        result = set_default_names(data)
        assert getattr(result.index, prop) == nm

    @pytest.mark.parametrize(
        "idx",
        [
            pd.Index([], name="index"),
            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
        ],
    )
    def test_warns_non_roundtrippable_names(self, idx):
        # GH 19130
        df = pd.DataFrame(index=idx)
        df.index.name = "index"
        with tm.assert_produces_warning():
            set_default_names(df)

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
        )
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z"
        assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"

    @pytest.mark.parametrize(
        "case",
        [
            pd.Series([1], index=pd.Index([1], name="a"), name="a"),
            pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
            pd.DataFrame(
                {"A": [1]},
                index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
            ),
        ],
    )
    def test_overlapping_names(self, case):
        with pytest.raises(ValueError, match="Overlapping"):
            case.to_json(orient="table")
Ejemplo n.º 42
0
def to_session(df: pd.DataFrame):
    # Salva o conteúdo em memória, de forma comprimida
    # return compressStringToBytes(df.to_json())
    return df.to_json()
Ejemplo n.º 43
0
 def test_read_json_table_orient(self, index_nm, vals):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     result = pd.read_json(out, orient="table")
     tm.assert_frame_equal(df, result)
Ejemplo n.º 44
0
def get_and_store_usage_data(selected_month):
    rows = session.execute(admin_queries['data_usage_by_month'],
                           [selected_month])
    df = DataFrame(rows)
    return df.to_json(date_format='iso')
Ejemplo n.º 45
0
def process(request, exec_instance):
    dataset_list = []
    service_exec = ServiceInstance.objects.get(pk=int(exec_instance))
    try:
        service_exec.arguments = {
            "filter-arguments": [],
            "algorithm-arguments": [{}, {}]
        }

        spill_infos, wave_model, ocean_model, natura_layer, ais_layer, time_interval, sim_length, oil_density, valid_points, valid_points_count, scenario, start_date, latitude, longitude = parse_request_params(
            request)
        depth = 0
        if (scenario == '1') or (scenario == '3'):
            service_exec.arguments["algorithm-arguments"][0][
                "latitude"] = spill_infos[0]['latitude']
            service_exec.arguments["algorithm-arguments"][0][
                "longitude"] = spill_infos[0]['longitude']
            if scenario == '3':
                cursor_presto = get_presto_cursor()
                resolution = 1
                if wave_model == '202':
                    query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str(
                        resolution) + " )=" + str(
                            round(float(spill_infos[0]['latitude']),
                                  resolution)) + " AND round(longitude," + str(
                                      resolution) + ")=" + str(
                                          round(
                                              float(
                                                  spill_infos[0]['longitude']),
                                              resolution)) + ")"
                    cursor_presto.execute(query)
                    try:
                        dataset_list.append((Dataset.objects.get(
                            table_name='hcmr_poseidon_aeg_bathymetry')).id)
                    except:
                        print 'Dataset does not exist in database'
                else:
                    query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str(
                        resolution) + " )=" + str(
                            round(float(spill_infos[0]['latitude']),
                                  resolution)) + " AND round(longitude," + str(
                                      resolution) + ")=" + str(
                                          round(
                                              float(
                                                  spill_infos[0]['longitude']),
                                              resolution)) + ")"
                cursor_presto.execute(query)
                try:
                    dataset_list.append((Dataset.objects.get(
                        table_name='hcmr_poseidon_med_bathymetry')).id)
                except:
                    print 'Dataset does not exist in database'
                result = cursor_presto.fetchall()
                try:
                    depth = float(result[0][0])
                except:
                    resolution = 0
                    print 'exception: trying with less precise resolution'
                    if wave_model == '202':
                        query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str(
                            resolution) + " )=" + str(
                                round(float(spill_infos[0]['latitude']),
                                      resolution)
                            ) + " AND round(longitude," + str(
                                resolution) + ")=" + str(
                                    round(float(spill_infos[0]['longitude']),
                                          resolution)) + ")"
                        cursor_presto.execute(query)
                    else:
                        query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str(
                            resolution) + " )=" + str(
                                round(float(spill_infos[0]['latitude']),
                                      resolution)
                            ) + " AND round(longitude," + str(
                                resolution) + ")=" + str(
                                    round(float(spill_infos[0]['longitude']),
                                          resolution)) + ")"
                    cursor_presto.execute(query)
                    result = cursor_presto.fetchall()
                    try:
                        depth = float(result[0][0])
                    except:
                        depth = 0
                service_exec.arguments["algorithm-arguments"][0][
                    "depth"] = depth
                print query
                print 'Oilspill depth:' + str(depth)
                # service_exec.arguments["algorithm-arguments"][0]["depth"] = spill_infos[0]['depth']

        elif scenario == '2':
            count = 1
            for el in spill_infos:
                service_exec.arguments["algorithm-arguments"][0][
                    "latitude" + str(count)] = spill_infos[count -
                                                           1]['latitude']
                service_exec.arguments["algorithm-arguments"][0][
                    "longitude" + str(count)] = spill_infos[count -
                                                            1]['longitude']
                count = count + 1
            service_exec.arguments["algorithm-arguments"][0][
                "number_of_points"] = count - 1

        service_exec.arguments["algorithm-arguments"][0][
            "start_date"] = spill_infos[0]['start_date']
        service_exec.arguments["algorithm-arguments"][0][
            "oil_volume"] = spill_infos[0]['oil_volume']
        service_exec.arguments["algorithm-arguments"][0]["sim_length"] = str(
            sim_length)
        if wave_model == '202':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Poseidon WAM Cycle 4 for the Aegean'
        elif wave_model == '201':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Poseidon WAM Cycle 4 for the Mediterranean'
        elif wave_model == '203':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Copernicus Wave Model for the Mediterranean'
        else:
            service_exec.arguments["algorithm-arguments"][0]["wave_model"] = ''

        if ocean_model == '001':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Poseidon High Resolution Aegean Model'
        elif ocean_model == '002':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Poseidon Mediterranean Model'
        elif ocean_model == '003':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Copernicus Mediterranean Model'
        else:
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = ''

        service_exec.arguments["algorithm-arguments"][0][
            "natura_layer"] = natura_layer
        service_exec.arguments["algorithm-arguments"][0][
            "ais_layer"] = ais_layer

        # 1)Create input file
        if service_exec.status == 'failed':
            raise Exception
        service_exec.status = "Creating simulation request"
        service_exec.save()
        filename, url_params = create_inp_file_from_request_and_upload(
            request, depth)
        # 2)Calculate oil spill
        if service_exec.status == 'failed':
            raise Exception
        service_exec.status = "Simulation running"
        service_exec.save()
        found = wait_until_output_ready(url_params, request)
        if found:
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Simulation results received"
            service_exec.save()
            filename_output = str(filename).replace("_F.inp", "_F.out")
            hcmr_data_filename = str(filename).replace("_F.inp", ".json")
            red_points_filename = str(filename).replace("_F.inp", ".txt")

            # 3)Transforming data to be shown on map
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Transforming data to be shown on map"
            service_exec.save()
            output_path = 'service_builder/static/services_files/hcmr_service_1/' + filename_output
            spill_data, parcel_data = create_json_from_out_file(output_path)
            # spill_data = [spill_infos[0]['start_date']+':00', spill_infos[0]['latitude'], spill_infos[0]['longitude'], spill_data[0][3], spill_data[0][4], spill_data[0][3], spill_infos[0]['oil_volume'],spill_data[0][5], spill_data[0][6]]
            # print str(spill_infos[0]['latitude']) + ' ' + spill_infos[0]['longitude']
            # print str(valid_points[0][0]) + ' ' + str(valid_points[0][1])
            # for el in valid_points:
            #     parcel_data.insert(0,[spill_infos[0]['start_date'].encode('ascii') + ':00', float(el[0]),float(el[1]),
            #                   parcel_data[0][3], parcel_data[0][4], float(spill_infos[0]['oil_volume']),
            #                   parcel_data[0][6], parcel_data[0][7]])
            # spill_data.insert(0,
            #                    [spill_infos[0]['start_date'].encode('ascii') + ':00', spill_data[0][1], spill_data[0][2], spill_data[0][3], spill_data[0][4], spill_data[0][5], spill_data[0][6], spill_data[0][7], spill_data[0][8], spill_data[0][9], spill_data[0][10]])

            print 'create_json_from_out_file done'
            headers_parcel = [
                "time", "Lat", "Lon", "Dpth", "Status", "Volume(m3)", "Dens",
                "Visc"
            ]
            parcel_df = DataFrame(parcel_data, columns=headers_parcel)
            print 'parcel_df = DataFrame done'
            print(parcel_df.head(2))
            parcel_df.to_json('visualizer/static/visualizer/files/' +
                              hcmr_data_filename,
                              orient='records')
            print 'parcel_df.to_json done'

            headers_spill = [
                'time', 'N', '%ev', '%srf', '%em', '%disp', '%cst', '%btm',
                'max_visc', 'min_visc', 'dens'
            ]
            service_exec.arguments["algorithm-arguments"][1][
                "headers_spill"] = headers_spill
            service_exec.arguments["algorithm-arguments"][1][
                "spill_data"] = spill_data
            service_exec.save()

            print 'spill_data done'

            # 4)Calculate red points
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Calculating oil spill intersections with protected areas"
            service_exec.save()
            if natura_layer == "true":
                # red_points_calc.calculate(hcmr_data_filename, red_points_filename)
                pass
            if ais_layer == "true":
                try:
                    dataset_list.append(
                        (Dataset.objects.get(table_name='xmile_ais',
                                             stored_at='UBITECH_PRESTO')).id)
                except:
                    print 'Dataset does not exist in database'
            print 'red points calculated'
            # 5)Create Visualization

            print valid_points
            oil_spill_start = ''
            v_count = 1
            for el in valid_points:
                oil_spill_start = oil_spill_start + 'start_lat' + str(
                    v_count) + '=' + str(
                        el[0]) + '&start_lon' + str(v_count) + '=' + str(
                            el[1]) + '&'
                v_count = v_count + 1
            visualization_url = "http://" + request.META[
                'HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?" + oil_spill_start + "markerType=circle&lat_col=Lat&lon_col=Lon" + "&data_file=" + hcmr_data_filename + "&red_points_file=" + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer + "&time_interval=" + time_interval + "&valid_points=" + str(
                    len(valid_points))
            visualization_url = "http://" + request.META['HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?"+oil_spill_start \
                                + "&markerType=circle&lat_col=Lat&lon_col=Lon" \
                                + "&data_file=" + hcmr_data_filename + "&red_points_file=" \
                                + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer \
                                + "&time_interval=" + time_interval + "&start_date=" + start_date + \
                                '&latitude=' + latitude + "&longitude=" + longitude + "&length="+ sim_length + "&valid_points="+ str(len(valid_points))

            service_exec.dataframe_visualizations = {"v1": visualization_url}
            service_exec.arguments["algorithm-arguments"][0][
                "out_filepath"] = filename_output
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "done"
            service_exec.save()
            service_obj = service_exec.service
            for dataset_list_el_id in dataset_list:
                try:
                    dataset_obj = Dataset.objects.get(id=dataset_list_el_id)
                    dataset_service_execution(dataset_obj, service_obj)
                except:
                    pass
            service_use(service_obj)
            unique_service_use(service_obj, request.user)
            hcmr_statistics(scenario, sim_length, time_interval, ocean_model,
                            wave_model, str_to_bool(natura_layer),
                            str_to_bool(ais_layer))
            # context = {
            #     'url': visualization_url,
            #     'out_filepath': filename_output,
            # }
            # return render(request, 'hcmr_pilot/scenario1-results.html', context)
        else:
            # html = "<html><body>Something went wrong. Please, try again.</body></html>"
            # return HttpResponse(html)
            service_exec.status = "failed"
            service_exec.save()
    except:
        service_exec.status = "failed"
        service_exec.save()
Ejemplo n.º 46
0
 def test_frame_from_json_precise_float(self):
     df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
     result = read_json(df.to_json(), precise_float=True)
     assert_frame_equal(result, df)
Ejemplo n.º 47
0
 def test_frame_from_json_precise_float(self):
     df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
     result = read_json(df.to_json(), precise_float=True)
     assert_frame_equal(result, df, check_index_type=False,
                        check_column_type=False)
Ejemplo n.º 48
0
    def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        unser = read_json(df.to_json(), numpy=False)
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(),
                          numpy=False,
                          convert_axes=False,
                          dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))
Ejemplo n.º 49
0
class TestTableOrient(object):
    def setup_method(self, method):
        self.df = DataFrame(
            {
                'A': [1, 2, 3, 4],
                'B': ['a', 'b', 'c', 'c'],
                'C':
                pd.date_range('2016-01-01', freq='d', periods=4),
                'D':
                pd.timedelta_range('1H', periods=4, freq='T'),
                'E':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
                'F':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)),
                'G': [1., 2., 3, 4.],
                'H':
                pd.date_range(
                    '2016-01-01', freq='d', periods=4, tz='US/Central'),
            },
            index=pd.Index(range(4), name='idx'))

    def test_build_series(self):
        s = pd.Series([1, 2], name='a')
        s.index.name = 'id'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'id',
            'type': 'integer'
        }, {
            'name': 'a',
            'type': 'integer'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['id'],
        }

        expected = OrderedDict([('schema', schema),
                                ('data', [
                                    OrderedDict([('id', 0), ('a', 1)]),
                                    OrderedDict([('id', 1), ('a', 2)])
                                ])])
        assert result == expected

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = 'idx'
        result = df.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'name': 'A',
            'type': 'integer'
        }, {
            'name': 'B',
            'type': 'string'
        }, {
            'name': 'C',
            'type': 'datetime'
        }, {
            'name': 'D',
            'type': 'duration'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'E',
            'ordered': False,
            'type': 'any'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'F',
            'ordered': True,
            'type': 'any'
        }, {
            'name': 'G',
            'type': 'number'
        }, {
            'name': 'H',
            'type': 'datetime',
            'tz': 'US/Central'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['idx'],
        }
        data = [
            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
                         ('C', '2016-01-01T00:00:00.000Z'),
                         ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'),
                         ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]),
            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
                         ('C', '2016-01-02T00:00:00.000Z'),
                         ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'),
                         ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]),
            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
                         ('C', '2016-01-03T00:00:00.000Z'),
                         ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]),
            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
                         ('C', '2016-01-04T00:00:00.000Z'),
                         ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]),
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1., 2.])
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'number'
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 1.0),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 2.0),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range('2016', freq='Q-JAN', periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'freq': 'Q-JAN',
            'name': 'index',
            'type': 'datetime'
        }, {
            'name': 'values',
            'type': 'integer'
        }]

        schema = {'fields': fields, 'primaryKey': ['index']}
        data = [
            OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
                         ('values', 1)]),
            OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)])
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'any',
                'constraints': {
                    'enum': ['a', 'b']
                },
                'ordered': False
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 'a'),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 'b'),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient='table', date_format='epoch')

        # others work
        self.df.to_json(orient='table', date_format='iso')
        self.df.to_json(orient='table')

    def test_make_field_int(self):
        data = [1, 2, 3]
        kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "name", "type": 'integer'}
            assert result == expected

    def test_make_field_float(self):
        data = [1., 2., 3.]
        kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "name", "type": 'number'}
            assert result == expected

    def test_make_field_datetime(self):
        data = [1., 2., 3.]
        kinds = [
            pd.Series(pd.to_datetime(data), name='values'),
            pd.to_datetime(data)
        ]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "values", "type": 'datetime'}
            assert result == expected

        kinds = [
            pd.Series(pd.to_datetime(data, utc=True), name='values'),
            pd.to_datetime(data, utc=True)
        ]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "values", "type": 'datetime', "tz": "UTC"}
            assert result == expected

        arr = pd.period_range('2016', freq='A-DEC', periods=4)
        result = make_field(arr)
        expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
        assert result == expected

    def test_make_field_categorical(self):
        data = ['a', 'b', 'c']
        ordereds = [True, False]

        for ordered in ordereds:
            arr = pd.Series(pd.Categorical(data, ordered=ordered), name='cats')
            result = make_field(arr)
            expected = {
                "name": "cats",
                "type": "any",
                "constraints": {
                    "enum": data
                },
                "ordered": ordered
            }
            assert result == expected

            arr = pd.CategoricalIndex(data, ordered=ordered, name='cats')
            result = make_field(arr)
            expected = {
                "name": "cats",
                "type": "any",
                "constraints": {
                    "enum": data
                },
                "ordered": ordered
            }
            assert result == expected

    def test_categorical(self):
        s = pd.Series(pd.Categorical(['a', 'b', 'a']))
        s.index.name = 'idx'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'constraints': {
                'enum': ['a', 'b']
            },
            'name': 'values',
            'ordered': False,
            'type': 'any'
        }]

        expected = OrderedDict([('schema', {
            'fields': fields,
            'primaryKey': ['idx']
        }),
                                ('data', [
                                    OrderedDict([('idx', 0), ('values', 'a')]),
                                    OrderedDict([('idx', 1), ('values', 'b')]),
                                    OrderedDict([('idx', 2), ('values', 'a')])
                                ])])
        assert result == expected

    def test_set_default_names_unset(self):
        data = pd.Series(1, pd.Index([1]))
        result = set_default_names(data)
        assert result.index.name == 'index'

    def test_set_default_names_set(self):
        data = pd.Series(1, pd.Index([1], name='myname'))
        result = set_default_names(data)
        assert result.index.name == 'myname'

    def test_set_default_names_mi_unset(self):
        data = pd.Series(1, pd.MultiIndex.from_product([('a', 'b'),
                                                        ('c', 'd')]))
        result = set_default_names(data)
        assert result.index.names == ['level_0', 'level_1']

    def test_set_default_names_mi_set(self):
        data = pd.Series(
            1,
            pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
                                       names=['n1', 'n2']))
        result = set_default_names(data)
        assert result.index.names == ['n1', 'n2']

    def test_set_default_names_mi_partion(self):
        data = pd.Series(
            1,
            pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
                                       names=['n1', None]))
        result = set_default_names(data)
        assert result.index.names == ['n1', 'level_1']

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]],
            columns=[pd.Timestamp('2016'),
                     pd.Timedelta(10, unit='s')])
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js['schema']['fields'][1]['name'] == 1451606400000
        assert js['schema']['fields'][2]['name'] == 10000

    def test_overlapping_names(self):
        cases = [
            pd.Series([1], index=pd.Index([1], name='a'), name='a'),
            pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
            pd.DataFrame({"A": [1]},
                         index=pd.MultiIndex.from_arrays([['a'], [1]],
                                                         names=["A", "a"])),
        ]

        for data in cases:
            with pytest.raises(ValueError) as excinfo:
                data.to_json(orient='table')

            assert 'Overlapping' in str(excinfo.value)

    def test_mi_falsey_name(self):
        # GH 16203
        df = pd.DataFrame(np.random.randn(4, 4),
                          index=pd.MultiIndex.from_product([('A', 'B'),
                                                            ('a', 'b')]))
        result = [x['name'] for x in build_table_schema(df)['fields']]
        assert result == ['level_0', 'level_1', 0, 1, 2, 3]
Ejemplo n.º 50
0
class ToJSONLines(BaseIO):

    fname = "__test__.json"

    def setup(self):
        N = 10**5
        ncols = 5
        index = date_range("20000101", periods=N, freq="H")
        timedeltas = timedelta_range(start=1, periods=N, freq="s")
        datetimes = date_range(start=1, periods=N, freq="s")
        ints = np.random.randint(100000000, size=N)
        floats = np.random.randn(N)
        strings = tm.makeStringIndex(N)
        self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
        self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index)
        self.df_td_int_ts = DataFrame(
            {
                "td_1": timedeltas,
                "td_2": timedeltas,
                "int_1": ints,
                "int_2": ints,
                "ts_1": datetimes,
                "ts_2": datetimes,
            },
            index=index,
        )
        self.df_int_floats = DataFrame(
            {
                "int_1": ints,
                "int_2": ints,
                "int_3": ints,
                "float_1": floats,
                "float_2": floats,
                "float_3": floats,
            },
            index=index,
        )
        self.df_int_float_str = DataFrame(
            {
                "int_1": ints,
                "int_2": ints,
                "float_1": floats,
                "float_2": floats,
                "str_1": strings,
                "str_2": strings,
            },
            index=index,
        )

    def time_floats_with_int_idex_lines(self):
        self.df.to_json(self.fname, orient="records", lines=True)

    def time_floats_with_dt_index_lines(self):
        self.df_date_idx.to_json(self.fname, orient="records", lines=True)

    def time_delta_int_tstamp_lines(self):
        self.df_td_int_ts.to_json(self.fname, orient="records", lines=True)

    def time_float_int_lines(self):
        self.df_int_floats.to_json(self.fname, orient="records", lines=True)

    def time_float_int_str_lines(self):
        self.df_int_float_str.to_json(self.fname, orient="records", lines=True)
Ejemplo n.º 51
0
def test(dataset, config, log_dir):
    """Test model, output prediction as json file."""
    model_config = config['model']
    sess_config = config['session']

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    with tf.Graph().as_default():
        model = GRA(model_config)
        model.build_inference()

        with tf.Session(config=sess_config) as sess:
            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            # test iterate over examples
            result = DataFrame(columns=['id', 'answer'])
            correct = 0

            while dataset.has_test_example:
                vgg, c3d, question, answer, example_id = dataset.get_test_example(
                )
                feed_dict = {
                    model.appear: [vgg],
                    model.motion: [c3d],
                    model.question_encode: [question],
                }
                prediction, channel_weight, appear_weight, motion_weight = sess.run(
                    [
                        model.prediction, model.channel_weight,
                        model.appear_weight, model.motion_weight
                    ],
                    feed_dict=feed_dict)
                #prediction = prediction[0]
                channel_weight = channel_weight[0]
                appear_weight = appear_weight[0]
                motion_weight = motion_weight[0]

                result = result.append(
                    {
                        'id': example_id,
                        'answer': prediction[1]
                    },
                    ignore_index=True)
                # modified-why
                # if answerset[prediction] in answer:
                #     correct += 1
                #     print(answer, example_id, channel_weight)
                # print(appear_weight)
                # print(motion_weight)

            result.to_json(os.path.join(log_dir, 'prediction.json'), 'records')

            # acc = correct / dataset.test_example_total
            # print('\n[TEST] acc {:.5f}.\n'.format(acc))

            dataset.reset_test()
            return None
Ejemplo n.º 52
0
def export(df: pd.DataFrame, file_path: str):
    print(f'dataframe has {len(df)} rows')
    print(f'started exporting {file_path}: {datetime.now()}')
    df.to_json(file_path)
    print(f'created {file_path}: {datetime.now()}')
Ejemplo n.º 53
0
 def render_dataframe(self, df: pd.DataFrame, response: Response) -> str:
     return df.to_json(orient="records")
Ejemplo n.º 54
0
 def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
     # GH 35973
     df = DataFrame(vals, index=idx)
     out = df.to_json(orient="table")
     result = pd.read_json(out, orient="table")
     tm.assert_frame_equal(df, result)
Ejemplo n.º 55
0
 def test_read_json_table_orient_raises(self, index_nm, vals):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     with tm.assert_raises_regex(NotImplementedError, 'can not yet read '):
         pd.read_json(out, orient="table")
Ejemplo n.º 56
0
 def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     with pytest.raises(NotImplementedError, match='can not yet read '):
         pd.read_json(out, orient="table")
Ejemplo n.º 57
0
 def persist_dataset(self, dataset: pd.DataFrame, overwrite: bool):
     orient = self.dataset_conf.get('orient')
     dataset.to_json(self.dataset_conf['uri'], orient=orient)
Ejemplo n.º 58
0
def lines_json_df():
    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    return df.to_json(lines=True, orient="records")
Ejemplo n.º 59
0
class TestTableOrient(object):
    def setup_method(self, method):
        self.df = DataFrame(
            {
                'A': [1, 2, 3, 4],
                'B': ['a', 'b', 'c', 'c'],
                'C':
                pd.date_range('2016-01-01', freq='d', periods=4),
                'D':
                pd.timedelta_range('1H', periods=4, freq='T'),
                'E':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
                'F':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)),
                'G': [1., 2., 3, 4.],
                'H':
                pd.date_range(
                    '2016-01-01', freq='d', periods=4, tz='US/Central'),
            },
            index=pd.Index(range(4), name='idx'))

    def test_build_series(self):
        s = pd.Series([1, 2], name='a')
        s.index.name = 'id'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'id',
            'type': 'integer'
        }, {
            'name': 'a',
            'type': 'integer'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['id'],
        }

        expected = OrderedDict([('schema', schema),
                                ('data', [
                                    OrderedDict([('id', 0), ('a', 1)]),
                                    OrderedDict([('id', 1), ('a', 2)])
                                ])])
        assert result == expected

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = 'idx'
        result = df.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'name': 'A',
            'type': 'integer'
        }, {
            'name': 'B',
            'type': 'string'
        }, {
            'name': 'C',
            'type': 'datetime'
        }, {
            'name': 'D',
            'type': 'duration'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'E',
            'ordered': False,
            'type': 'any'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'F',
            'ordered': True,
            'type': 'any'
        }, {
            'name': 'G',
            'type': 'number'
        }, {
            'name': 'H',
            'type': 'datetime',
            'tz': 'US/Central'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['idx'],
        }
        data = [
            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
                         ('C', '2016-01-01T00:00:00.000Z'),
                         ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'),
                         ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]),
            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
                         ('C', '2016-01-02T00:00:00.000Z'),
                         ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'),
                         ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]),
            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
                         ('C', '2016-01-03T00:00:00.000Z'),
                         ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]),
            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
                         ('C', '2016-01-04T00:00:00.000Z'),
                         ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]),
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1., 2.])
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'number'
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 1.0),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 2.0),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range('2016', freq='Q-JAN', periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'freq': 'Q-JAN',
            'name': 'index',
            'type': 'datetime'
        }, {
            'name': 'values',
            'type': 'integer'
        }]

        schema = {'fields': fields, 'primaryKey': ['index']}
        data = [
            OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
                         ('values', 1)]),
            OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)])
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'any',
                'constraints': {
                    'enum': ['a', 'b']
                },
                'ordered': False
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 'a'),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 'b'),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient='table', date_format='epoch')

        # others work
        self.df.to_json(orient='table', date_format='iso')
        self.df.to_json(orient='table')

    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
    def test_convert_pandas_type_to_json_field_int(self, kind):
        data = [1, 2, 3]
        result = convert_pandas_type_to_json_field(kind(data, name='name'))
        expected = {"name": "name", "type": "integer"}
        assert result == expected

    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
    def test_convert_pandas_type_to_json_field_float(self, kind):
        data = [1., 2., 3.]
        result = convert_pandas_type_to_json_field(kind(data, name='name'))
        expected = {"name": "name", "type": "number"}
        assert result == expected

    @pytest.mark.parametrize('dt_args,extra_exp', [({}, {}),
                                                   ({
                                                       'utc': True
                                                   }, {
                                                       'tz': 'UTC'
                                                   })])
    @pytest.mark.parametrize('wrapper', [None, pd.Series])
    def test_convert_pandas_type_to_json_field_datetime(
            self, dt_args, extra_exp, wrapper):
        data = [1., 2., 3.]
        data = pd.to_datetime(data, **dt_args)
        if wrapper is pd.Series:
            data = pd.Series(data, name='values')
        result = convert_pandas_type_to_json_field(data)
        expected = {"name": "values", "type": 'datetime'}
        expected.update(extra_exp)
        assert result == expected

    def test_convert_pandas_type_to_json_period_range(self):
        arr = pd.period_range('2016', freq='A-DEC', periods=4)
        result = convert_pandas_type_to_json_field(arr)
        expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
        assert result == expected

    @pytest.mark.parametrize('kind', [pd.Categorical, pd.CategoricalIndex])
    @pytest.mark.parametrize('ordered', [True, False])
    def test_convert_pandas_type_to_json_field_categorical(
            self, kind, ordered):
        data = ['a', 'b', 'c']
        if kind is pd.Categorical:
            arr = pd.Series(kind(data, ordered=ordered), name='cats')
        elif kind is pd.CategoricalIndex:
            arr = kind(data, ordered=ordered, name='cats')

        result = convert_pandas_type_to_json_field(arr)
        expected = {
            "name": "cats",
            "type": "any",
            "constraints": {
                "enum": data
            },
            "ordered": ordered
        }
        assert result == expected

    @pytest.mark.parametrize(
        "inp,exp",
        [({
            'type': 'integer'
        }, 'int64'), ({
            'type': 'number'
        }, 'float64'), ({
            'type': 'boolean'
        }, 'bool'), ({
            'type': 'duration'
        }, 'timedelta64'), ({
            'type': 'datetime'
        }, 'datetime64[ns]'),
         ({
             'type': 'datetime',
             'tz': 'US/Hawaii'
         }, 'datetime64[ns, US/Hawaii]'), ({
             'type': 'any'
         }, 'object'),
         ({
             'type': 'any',
             'constraints': {
                 'enum': ['a', 'b', 'c']
             },
             'ordered': False
         }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=False)),
         ({
             'type': 'any',
             'constraints': {
                 'enum': ['a', 'b', 'c']
             },
             'ordered': True
         }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)),
         ({
             'type': 'string'
         }, 'object')])
    def test_convert_json_field_to_pandas_type(self, inp, exp):
        field = {'name': 'foo'}
        field.update(inp)
        assert convert_json_field_to_pandas_type(field) == exp

    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
    def test_convert_json_field_to_pandas_type_raises(self, inp):
        field = {'type': inp}
        with tm.assert_raises_regex(
                ValueError, "Unsupported or invalid field "
                "type: {}".format(inp)):
            convert_json_field_to_pandas_type(field)

    def test_categorical(self):
        s = pd.Series(pd.Categorical(['a', 'b', 'a']))
        s.index.name = 'idx'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'constraints': {
                'enum': ['a', 'b']
            },
            'name': 'values',
            'ordered': False,
            'type': 'any'
        }]

        expected = OrderedDict([('schema', {
            'fields': fields,
            'primaryKey': ['idx']
        }),
                                ('data', [
                                    OrderedDict([('idx', 0), ('values', 'a')]),
                                    OrderedDict([('idx', 1), ('values', 'b')]),
                                    OrderedDict([('idx', 2), ('values', 'a')])
                                ])])
        assert result == expected

    @pytest.mark.parametrize(
        'idx,nm,prop',
        [(pd.Index([1]), 'index', 'name'),
         (pd.Index([1], name='myname'), 'myname', 'name'),
         (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')
                                      ]), ['level_0', 'level_1'], 'names'),
         (pd.MultiIndex.from_product(
             [('a', 'b'),
              ('c', 'd')], names=['n1', 'n2']), ['n1', 'n2'], 'names'),
         (pd.MultiIndex.from_product(
             [('a', 'b'),
              ('c', 'd')], names=['n1', None]), ['n1', 'level_1'], 'names')])
    def test_set_names_unset(self, idx, nm, prop):
        data = pd.Series(1, idx)
        result = set_default_names(data)
        assert getattr(result.index, prop) == nm

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]],
            columns=[pd.Timestamp('2016'),
                     pd.Timedelta(10, unit='s')])
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js['schema']['fields'][1]['name'] == 1451606400000
        assert js['schema']['fields'][2]['name'] == 10000

    @pytest.mark.parametrize('case', [
        pd.Series([1], index=pd.Index([1], name='a'), name='a'),
        pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
        pd.DataFrame({"A": [1]},
                     index=pd.MultiIndex.from_arrays([['a'], [1]],
                                                     names=["A", "a"]))
    ])
    def test_overlapping_names(self, case):
        with tm.assert_raises_regex(ValueError, 'Overlapping'):
            case.to_json(orient='table')
Ejemplo n.º 60
0
def dive(data: pandas.DataFrame) -> HTML:
    # Element ID MUST be unique
    elem_id = _generate_element_id()
    json_str = data.to_json(orient='records')
    return HTML(FACETS_DIVE_TEMPLATE.format(elem_id=elem_id, json_str=json_str))