def _test_join_all(self, how: str): # Create a custom function used to read tables casting to the expected schema read_table_ = partial(read_table, schema=SCHEMA, low_memory=False) for left in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False): for right in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False): if left.name == right.name: continue left_columns = read_table_(left).columns right_columns = read_table_(right).columns if not "date" in right_columns: self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how) if "date" in left_columns and not "date" in right_columns: self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how) if "date" in left_columns and "date" in right_columns: self._test_join_pair(read_table_, SCHEMA, left, right, ["key", "date"], how)
def test_convert_csv_to_json_records(self): for json_convert_method in ( _convert_csv_to_json_records_fast, _convert_csv_to_json_records_slow, ): with TemporaryDirectory() as workdir: workdir = Path(workdir) for csv_file in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False): json_output = workdir / csv_file.name.replace( "csv", "json") json_convert_method(SCHEMA, csv_file, json_output) with json_output.open("r") as fd: json_obj = json.load(fd) json_df = DataFrame(data=json_obj["data"], columns=json_obj["columns"]) csv_test_file = workdir / json_output.name.replace( "json", "csv") export_csv(json_df, csv_test_file, schema=SCHEMA) for line1, line2 in zip(read_lines(csv_file), read_lines(csv_test_file)): self.assertEqual(line1, line2)
def test_convert_csv_to_json_records(self): for json_convert_method in ( _convert_csv_to_json_records_fast, _convert_csv_to_json_records_slow, ): with temporary_directory() as workdir: for csv_file in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False): json_output = workdir / csv_file.name.replace("csv", "json") json_convert_method(SCHEMA, csv_file, json_output) with json_output.open("r") as fd: json_obj = json.load(fd) json_df = DataFrame(data=json_obj["data"], columns=json_obj["columns"]) csv_test_file = workdir / json_output.name.replace("json", "csv") export_csv(json_df, csv_test_file, schema=SCHEMA) _compare_tables_equal(self, csv_file, csv_test_file)
def _test_join_all(self, how_mem: str, how_pandas: str): # Create a custom function used to read tables casting to the expected schema read_table_ = partial(read_table, schema=SCHEMA, low_memory=False) # Test joining the index table with every other table left = SRC / "test" / "data" / "index.csv" for right in pbar([*(SRC / "test" / "data").glob("*.csv")], leave=False): if left.name == right.name: continue left_columns = get_table_columns(left) right_columns = get_table_columns(right) if not "date" in right_columns: self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how_mem, how_pandas) if "date" in left_columns and not "date" in right_columns: self._test_join_pair(read_table_, SCHEMA, left, right, ["key"], how_mem, how_pandas) if "date" in left_columns and "date" in right_columns: self._test_join_pair( read_table_, SCHEMA, left, right, ["key", "date"], how_mem, how_pandas )