def _test_table_merge(self, how_mem: str, how_pandas: str): test_data_1 = DataFrame.from_records( [ {"col1": "a", "col2": "1"}, {"col1": "a", "col2": "2"}, {"col1": "b", "col2": "3"}, {"col1": "b", "col2": "4"}, {"col1": "c", "col2": "5"}, {"col1": "c", "col2": "6"}, ] ) test_data_2 = DataFrame.from_records( [ {"col1": "a", "col3": "foo"}, {"col1": "b", "col3": "bar"}, {"col1": "c", "col3": "baz"}, ] ) test_data_3 = DataFrame.from_records( [ {"col1": "a", "col4": "apple"}, {"col1": "b", "col4": "banana"}, {"col1": "c", "col4": "orange"}, ] ) with TemporaryDirectory() as workdir: workdir = Path(workdir) test_file_1 = workdir / "test.1.csv" test_file_2 = workdir / "test.2.csv" test_file_3 = workdir / "test.3.csv" export_csv(test_data_1, test_file_1) export_csv(test_data_2, test_file_2) export_csv(test_data_3, test_file_3) output_file_1 = workdir / "output.1.csv" output_file_2 = workdir / "output.2.csv" expected = table_merge_pandas( [test_data_1, test_data_2, test_data_3], on=["col1"], how=how_pandas ) export_csv(expected, path=output_file_1) table_merge_mem( [test_file_1, test_file_2, test_file_3], output_file_2, on=["col1"], how=how_mem ) self._compare_tables_equal(output_file_1, output_file_2)
def _test_table_merge(self, how_sqlite: str, how_pandas: str): test_data_1 = DataFrame.from_records([ { "col1": "a", "col2": "1" }, { "col1": "a", "col2": "2" }, { "col1": "b", "col2": "3" }, { "col1": "b", "col2": "4" }, { "col1": "c", "col2": "5" }, { "col1": "c", "col2": "6" }, ]) test_data_2 = DataFrame.from_records([ { "col1": "a", "col3": "foo" }, { "col1": "b", "col3": "bar" }, { "col1": "c", "col3": "baz" }, ]) test_data_3 = DataFrame.from_records([ { "col1": "a", "col4": "apple" }, { "col1": "b", "col4": "banana" }, { "col1": "c", "col4": "orange" }, ]) with TemporaryDirectory() as workdir: workdir = Path(workdir) sqlite_file = workdir / "tmp.sqlite" with create_sqlite_database(db_file=sqlite_file) as conn: table_name_1 = "_1" table_name_2 = "_2" table_name_3 = "_3" table_create(conn, table_name_1, { "col1": "TEXT", "col2": "TEXT" }) table_create(conn, table_name_2, { "col1": "TEXT", "col3": "TEXT" }) table_create(conn, table_name_3, { "col1": "TEXT", "col4": "TEXT" }) table_import_from_records( conn, table_name_1, _dataframe_records_iterator(test_data_1)) table_import_from_records( conn, table_name_2, _dataframe_records_iterator(test_data_2)) table_import_from_records( conn, table_name_3, _dataframe_records_iterator(test_data_3)) self._check_table_not_empty(conn, table_name_1) self._check_table_not_empty(conn, table_name_2) self._check_table_not_empty(conn, table_name_3) expected = table_merge_pandas( [test_data_1, test_data_2, test_data_3], on=["col1"], how=how_pandas) # Merge and output as an iterable result1 = DataFrame.from_records( table_merge_sql( conn, [table_name_1, table_name_2, table_name_3], on=["col1"], how=how_sqlite, )) self._compare_dataframes_equal(result1, expected) # Merge into a table, and output its data table_name_merged = "_merged" table_merge_sql( conn, [table_name_1, table_name_2, table_name_3], on=["col1"], how=how_sqlite, into_table=table_name_merged, ) result2 = DataFrame.from_records( table_select_all(conn, table_name_merged)) self._compare_dataframes_equal(result2, expected)