Exemple #1
0
    def test_expand_empty(self):
        data_test = """{ "ID": "0", "SUBVAL": [] }
{ "ID": "1", "SUBVAL": [ {"ID_SUB":"0"}, {"ID_SUB":"1"}, {"ID_SUB":"2"} ] }
"""
        data_expected = """{ "ID": "1", "ID_SUB": "0" }
{ "ID": "1", "ID_SUB": "1" }
{ "ID": "1", "ID_SUB": "2" }
"""

        df = records.load_jsonl(
            inpt.from_str(data_test),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
            ],
        )

        df = records.expand_multivalued(df, {
            "ID_SUB": ["SUBVAL", None, "ID_SUB"],
        })

        df_expected = records.load_jsonl(
            inpt.from_str(data_expected),
            [
                records.SchemaField("ID"),
                records.SchemaField("ID_SUB"),
            ],
        )

        pandas.testing.assert_frame_equal(df_expected, df)
Exemple #2
0
    def test_basic_no_drop(self):
        data_test = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ] }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ] }
"""
        data_expected = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" }
{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" }
{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" }
{ "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" }
"""

        df = records.load_jsonl(
            inpt.from_str(data_test),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
            ],
        )

        df = records.expand_multivalued(df, {
            "ID_SUB": ["SUBVAL", None],
        },
                                        drop_mv=False)

        df_expected = records.load_jsonl(
            inpt.from_str(data_expected),
            [
                records.SchemaField("ID"),
                records.SchemaField("SUBVAL"),
                records.SchemaField("ID_SUB"),
            ],
        )

        pandas.testing.assert_frame_equal(df_expected, df)
Exemple #3
0
    def test_vtt_strict(self):
        df = pandas.DataFrame({"A": range(10), "B": range(10)}, dtype=str)

        STR_IN = """old-val,new-val
0,10
1,9
2,8
3,7
4,6
5,5
6,4
7,3
8,2
9,1
10,0"""

        vt = value_translator.ValueTranslator()
        vt.add_vtt(
            "B",
            value_translator.load_from_csv(inpt.from_str(STR_IN), strict=True))
        vt.translate(df)

        pandas.testing.assert_frame_equal(
            pandas.DataFrame({
                "A": range(10),
                "B": range(10, 0, -1)
            },
                             dtype=str),
            df,
        )
Exemple #4
0
    def test_load_flatten_error(self):
        inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"},{"B":"2"}]}],"C":"2"}'

        with self.assertRaises(ValueError):
            df = records.load_jsonl(
                inpt.from_str(inpt_str),
                (
                    records.SchemaField("A"),
                    records.SchemaField("B_MV", transform=records.flatten_mv),
                    records.SchemaField("C"),
                ),
            )
Exemple #5
0
    def test_load_jsonl_transform(self):
        inpt_str = '{"A":"test"}'

        df = records.load_jsonl(
            inpt.from_str(inpt_str),
            (records.SchemaField(
                "A",
                transform=[
                    lambda v: f"{v}-suffix", lambda v: f"prefix-{v}",
                    lambda v: v.upper()
                ]), ),
        )

        self.assertEqual(df.at[0, "A"], "PREFIX-TEST-SUFFIX")
Exemple #6
0
    def test_load_csv_basic(self):
        INPT_STR = "A,B,C,D\n0,1,2,3"

        df = records.load_csv(
            inpt.from_str(INPT_STR),
            [
                records.SchemaField("A"),
                records.SchemaField("B"),
                records.SchemaField("C"),
            ],
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #7
0
    def test_load_flatten(self):
        inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"}]}],"C":"2"}'

        df = records.load_jsonl(
            inpt.from_str(inpt_str),
            (
                records.SchemaField("A"),
                records.SchemaField("B_MV", transform=records.flatten_mv),
                records.SchemaField("C"),
            ),
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B_MV"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #8
0
    def test_load_jsonl_basic(self):
        INPT_STR = '{"A":"0","B":"1","C":"2"}'

        df = records.load_jsonl(
            inpt.from_str(INPT_STR),
            [
                records.SchemaField("A"),
                records.SchemaField("B"),
                records.SchemaField("C"),
            ],
        )

        self.assertEqual(df.at[0, "A"], "0")
        self.assertEqual(df.at[0, "B"], "1")
        self.assertEqual(df.at[0, "C"], "2")
Exemple #9
0
    def test_from_str(self):
        df = pandas.DataFrame({"A": range(10), "B": range(10)}, dtype=str)

        STR_IN = """old-val,new-val
0,10
1,9
2,8
3,7
4,6"""

        vt = value_translator.ValueTranslator()
        vt.add_vtt("B", value_translator.load_from_csv(inpt.from_str(STR_IN)))
        vt.translate(df)

        pandas.testing.assert_frame_equal(
            pandas.DataFrame(
                {
                    "A": range(10),
                    "B": list(range(10, 5, -1)) + list(range(5, 10))
                },
                dtype=str,
            ),
            df,
        )
 def test_from_str(self):
     STR_IN = "This is a test"
     with inpt.from_str(STR_IN).open() as f:
         self.assertEqual(f.read(), STR_IN)