Exemple #1
0
    def test_expand_empty(self):
        data_test = """{ "ID": "0", "SUBVAL": [] }
{ "ID": "1", "SUBVAL": [ {"ID_SUB":"0"}, {"ID_SUB":"1"}, {"ID_SUB":"2"} ] }
"""
        data_expected = """{ "ID": "1", "ID_SUB": "0" }
{ "ID": "1", "ID_SUB": "1" }
{ "ID": "1", "ID_SUB": "2" }
"""

        df = records.load_jsonl(
            inpt.from_str(data_test),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
            ],
        )

        df = records.expand_multivalued(df, {
            "ID_SUB": ["SUBVAL", None, "ID_SUB"],
        })

        df_expected = records.load_jsonl(
            inpt.from_str(data_expected),
            [
                records.SchemaField("ID"),
                records.SchemaField("ID_SUB"),
            ],
        )

        pandas.testing.assert_frame_equal(df_expected, df)
Exemple #2
0
    def test_basic_no_drop(self):
        data_test = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ] }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ] }
"""
        data_expected = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" }
{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" }
{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" }
"""

        df = records.load_jsonl(
            inpt.from_str(data_test),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
            ],
        )

        df = records.expand_multivalued(df, {
            "ID_SUB": ["SUBVAL", None],
        },
                                        drop_mv=False)

        df_expected = records.load_jsonl(
            inpt.from_str(data_expected),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
                records.SchemaField("ID_SUB"),
            ],
        )

        pandas.testing.assert_frame_equal(df_expected, df)
Exemple #3
0
    def test_load_flatten_error(self):
        inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"},{"B":"2"}]}],"C":"2"}'

        with self.assertRaises(ValueError):
            df = records.load_jsonl(
                inpt.from_str(inpt_str),
                (
                    records.SchemaField("A"),
                    records.SchemaField("B_MV", transform=records.flatten_mv),
                    records.SchemaField("C"),
                ),
            )
Exemple #4
0
    def test_load_csv_basic(self):
        INPT_STR = "A,B,C,D\n0,1,2,3"

        df = records.load_csv(
            inpt.from_str(INPT_STR),
            [
                records.SchemaField("A"),
                records.SchemaField("B"),
                records.SchemaField("C"),
            ],
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #5
0
    def test_load_flatten(self):
        inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"}]}],"C":"2"}'

        df = records.load_jsonl(
            inpt.from_str(inpt_str),
            (
                records.SchemaField("A"),
                records.SchemaField("B_MV", transform=records.flatten_mv),
                records.SchemaField("C"),
            ),
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B_MV"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #6
0
    def test_load_jsonl_basic(self):
        INPT_STR = '{"A":"0","B":"1","C":"2"}'

        df = records.load_jsonl(
            inpt.from_str(INPT_STR),
            [
                records.SchemaField("A"),
                records.SchemaField("B"),
                records.SchemaField("C"),
            ],
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #7
0
    def test_load_jsonl_transform(self):
        inpt_str = '{"A":"test"}'

        df = records.load_jsonl(
            inpt.from_str(inpt_str),
            (records.SchemaField(
                "A",
                transform=[
                    lambda v: f"{v}-suffix", lambda v: f"prefix-{v}",
                    lambda v: v.upper()
                ]), ),
        )

        self.assertEqual(df.at[0, "A"], "PREFIX-TEST-SUFFIX")