def test_expand_empty(self): data_test = """{ "ID": "0", "SUBVAL": [] } { "ID": "1", "SUBVAL": [ {"ID_SUB":"0"}, {"ID_SUB":"1"}, {"ID_SUB":"2"} ] } """ data_expected = """{ "ID": "1", "ID_SUB": "0" } { "ID": "1", "ID_SUB": "1" } { "ID": "1", "ID_SUB": "2" } """ df = records.load_jsonl( inpt.from_str(data_test), [ records.SchemaField("ID"), records.SchemaField("SUBVAL"), ], ) df = records.expand_multivalued(df, { "ID_SUB": ["SUBVAL", None, "ID_SUB"], }) df_expected = records.load_jsonl( inpt.from_str(data_expected), [ records.SchemaField("ID"), records.SchemaField("ID_SUB"), ], ) pandas.testing.assert_frame_equal(df_expected, df)
def test_basic_no_drop(self): data_test = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ] } { "ID": "1", "SUBVAL": [ "0", "1", "2" ] } """ data_expected = """{ "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" } { "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" } { "ID": "0", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" } { "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "0" } { "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "1" } { "ID": "1", "SUBVAL": [ "0", "1", "2" ], "ID_SUB": "2" } """ df = records.load_jsonl( inpt.from_str(data_test), [ records.SchemaField("ID"), records.SchemaField("SUBVAL"), ], ) df = records.expand_multivalued(df, { "ID_SUB": ["SUBVAL", None], }, drop_mv=False) df_expected = records.load_jsonl( inpt.from_str(data_expected), [ records.SchemaField("ID"), records.SchemaField("SUBVAL"), records.SchemaField("ID_SUB"), ], ) pandas.testing.assert_frame_equal(df_expected, df)
def test_load_flatten_error(self): inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"},{"B":"2"}]}],"C":"2"}' with self.assertRaises(ValueError): df = records.load_jsonl( inpt.from_str(inpt_str), ( records.SchemaField("A"), records.SchemaField("B_MV", transform=records.flatten_mv), records.SchemaField("C"), ), )
def test_load_jsonl_transform(self): inpt_str = '{"A":"test"}' df = records.load_jsonl( inpt.from_str(inpt_str), (records.SchemaField( "A", transform=[ lambda v: f"{v}-suffix", lambda v: f"prefix-{v}", lambda v: v.upper() ]), ), ) self.assertEqual(df.at[0, "A"], "PREFIX-TEST-SUFFIX")
def test_load_flatten(self): inpt_str = '{"A":"0","B_MV":[{"B_MS":[{"B":"1"}]}],"C":"2"}' df = records.load_jsonl( inpt.from_str(inpt_str), ( records.SchemaField("A"), records.SchemaField("B_MV", transform=records.flatten_mv), records.SchemaField("C"), ), ) self.assertEqual(df.at[0, "A"], "0") self.assertEqual(df.at[0, "B_MV"], "1") self.assertEqual(df.at[0, "C"], "2")
def test_load_jsonl_basic(self): INPT_STR = '{"A":"0","B":"1","C":"2"}' df = records.load_jsonl( inpt.from_str(INPT_STR), [ records.SchemaField("A"), records.SchemaField("B"), records.SchemaField("C"), ], ) self.assertEqual(df.at[0, "A"], "0") self.assertEqual(df.at[0, "B"], "1") self.assertEqual(df.at[0, "C"], "2")