Exemple #1
0
    def _test_join_all(self, how: str):

        # Create a custom function used to read tables casting to the expected schema
        read_table_ = partial(read_table, schema=SCHEMA, low_memory=False)

        for left in pbar([*(SRC / "test" / "data").glob("*.csv")],
                         leave=False):
            for right in pbar([*(SRC / "test" / "data").glob("*.csv")],
                              leave=False):
                if left.name == right.name:
                    continue

                left_columns = read_table_(left).columns
                right_columns = read_table_(right).columns

                if not "date" in right_columns:
                    self._test_join_pair(read_table_, SCHEMA, left, right,
                                         ["key"], how)

                if "date" in left_columns and not "date" in right_columns:
                    self._test_join_pair(read_table_, SCHEMA, left, right,
                                         ["key"], how)

                if "date" in left_columns and "date" in right_columns:
                    self._test_join_pair(read_table_, SCHEMA, left, right,
                                         ["key", "date"], how)
Exemple #2
0
    def test_convert_csv_to_json_records(self):
        for json_convert_method in (
                _convert_csv_to_json_records_fast,
                _convert_csv_to_json_records_slow,
        ):
            with TemporaryDirectory() as workdir:
                workdir = Path(workdir)

                for csv_file in pbar([*(SRC / "test" / "data").glob("*.csv")],
                                     leave=False):
                    json_output = workdir / csv_file.name.replace(
                        "csv", "json")
                    json_convert_method(SCHEMA, csv_file, json_output)

                    with json_output.open("r") as fd:
                        json_obj = json.load(fd)
                        json_df = DataFrame(data=json_obj["data"],
                                            columns=json_obj["columns"])

                    csv_test_file = workdir / json_output.name.replace(
                        "json", "csv")
                    export_csv(json_df, csv_test_file, schema=SCHEMA)

                    for line1, line2 in zip(read_lines(csv_file),
                                            read_lines(csv_test_file)):
                        self.assertEqual(line1, line2)
Exemple #3
0
    def test_convert_csv_to_json_records(self):
        for json_convert_method in (
            _convert_csv_to_json_records_fast,
            _convert_csv_to_json_records_slow,
        ):
            with temporary_directory() as workdir:

                for csv_file in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False):
                    json_output = workdir / csv_file.name.replace("csv", "json")
                    json_convert_method(SCHEMA, csv_file, json_output)

                    with json_output.open("r") as fd:
                        json_obj = json.load(fd)
                        json_df = DataFrame(data=json_obj["data"], columns=json_obj["columns"])

                    csv_test_file = workdir / json_output.name.replace("json", "csv")
                    export_csv(json_df, csv_test_file, schema=SCHEMA)

                    _compare_tables_equal(self, csv_file, csv_test_file)
Exemple #4
0
    def _test_join_all(self, how_mem: str, how_pandas: str):

        # Create a custom function used to read tables casting to the expected schema
        read_table_ = partial(read_table, schema=SCHEMA, low_memory=False)

        # Test joining the index table with every other table
        left = SRC / "test" / "data" / "index.csv"
        for right in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False):
            if left.name == right.name:
                continue

            left_columns = get_table_columns(left)
            right_columns = get_table_columns(right)

            if not "date" in right_columns:
                self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how_mem, how_pandas)

            if "date" in left_columns and not "date" in right_columns:
                self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how_mem, how_pandas)

            if "date" in left_columns and "date" in right_columns:
                self._test_join_pair(
                    read_table_, SCHEMA, left, right, ["key", "date"], how_mem, how_pandas
                )