def _test_table_merge(self, how_mem: str, how_pandas: str):
        test_data_1 = DataFrame.from_records(
            [
                {"col1": "a", "col2": "1"},
                {"col1": "a", "col2": "2"},
                {"col1": "b", "col2": "3"},
                {"col1": "b", "col2": "4"},
                {"col1": "c", "col2": "5"},
                {"col1": "c", "col2": "6"},
            ]
        )

        test_data_2 = DataFrame.from_records(
            [
                {"col1": "a", "col3": "foo"},
                {"col1": "b", "col3": "bar"},
                {"col1": "c", "col3": "baz"},
            ]
        )

        test_data_3 = DataFrame.from_records(
            [
                {"col1": "a", "col4": "apple"},
                {"col1": "b", "col4": "banana"},
                {"col1": "c", "col4": "orange"},
            ]
        )

        with TemporaryDirectory() as workdir:
            workdir = Path(workdir)

            test_file_1 = workdir / "test.1.csv"
            test_file_2 = workdir / "test.2.csv"
            test_file_3 = workdir / "test.3.csv"

            export_csv(test_data_1, test_file_1)
            export_csv(test_data_2, test_file_2)
            export_csv(test_data_3, test_file_3)

            output_file_1 = workdir / "output.1.csv"
            output_file_2 = workdir / "output.2.csv"

            expected = table_merge_pandas(
                [test_data_1, test_data_2, test_data_3], on=["col1"], how=how_pandas
            )
            export_csv(expected, path=output_file_1)

            table_merge_mem(
                [test_file_1, test_file_2, test_file_3], output_file_2, on=["col1"], how=how_mem
            )

            self._compare_tables_equal(output_file_1, output_file_2)
Exemple #2
0
    def _test_table_merge(self, how_sqlite: str, how_pandas: str):
        test_data_1 = DataFrame.from_records([
            {
                "col1": "a",
                "col2": "1"
            },
            {
                "col1": "a",
                "col2": "2"
            },
            {
                "col1": "b",
                "col2": "3"
            },
            {
                "col1": "b",
                "col2": "4"
            },
            {
                "col1": "c",
                "col2": "5"
            },
            {
                "col1": "c",
                "col2": "6"
            },
        ])

        test_data_2 = DataFrame.from_records([
            {
                "col1": "a",
                "col3": "foo"
            },
            {
                "col1": "b",
                "col3": "bar"
            },
            {
                "col1": "c",
                "col3": "baz"
            },
        ])

        test_data_3 = DataFrame.from_records([
            {
                "col1": "a",
                "col4": "apple"
            },
            {
                "col1": "b",
                "col4": "banana"
            },
            {
                "col1": "c",
                "col4": "orange"
            },
        ])

        with TemporaryDirectory() as workdir:
            workdir = Path(workdir)
            sqlite_file = workdir / "tmp.sqlite"
            with create_sqlite_database(db_file=sqlite_file) as conn:
                table_name_1 = "_1"
                table_name_2 = "_2"
                table_name_3 = "_3"

                table_create(conn, table_name_1, {
                    "col1": "TEXT",
                    "col2": "TEXT"
                })
                table_create(conn, table_name_2, {
                    "col1": "TEXT",
                    "col3": "TEXT"
                })
                table_create(conn, table_name_3, {
                    "col1": "TEXT",
                    "col4": "TEXT"
                })
                table_import_from_records(
                    conn, table_name_1,
                    _dataframe_records_iterator(test_data_1))
                table_import_from_records(
                    conn, table_name_2,
                    _dataframe_records_iterator(test_data_2))
                table_import_from_records(
                    conn, table_name_3,
                    _dataframe_records_iterator(test_data_3))

                self._check_table_not_empty(conn, table_name_1)
                self._check_table_not_empty(conn, table_name_2)
                self._check_table_not_empty(conn, table_name_3)

                expected = table_merge_pandas(
                    [test_data_1, test_data_2, test_data_3],
                    on=["col1"],
                    how=how_pandas)

                # Merge and output as an iterable
                result1 = DataFrame.from_records(
                    table_merge_sql(
                        conn,
                        [table_name_1, table_name_2, table_name_3],
                        on=["col1"],
                        how=how_sqlite,
                    ))
                self._compare_dataframes_equal(result1, expected)

                # Merge into a table, and output its data
                table_name_merged = "_merged"
                table_merge_sql(
                    conn,
                    [table_name_1, table_name_2, table_name_3],
                    on=["col1"],
                    how=how_sqlite,
                    into_table=table_name_merged,
                )
                result2 = DataFrame.from_records(
                    table_select_all(conn, table_name_merged))
                self._compare_dataframes_equal(result2, expected)