def test_specific_cols(): diff = Diffino(mode="pandas", left="/tmp/one.csv", right="/tmp/two.csv", cols=["id", "name"]) results = diff.build_diff() assert results
def test_specific_cols(): diff = Diffino(mode='pandas', left='/tmp/one.csv', right='/tmp/two.csv', cols=['id', 'name']) results = diff.build_diff() assert results
def test_output_csv_pandas(): diff = Diffino(mode="pandas", left="/tmp/one.csv", right="/tmp/two.csv", output="/tmp/diff.csv") diff.build_diff() raise Exception("Finish test!")
def test_convert_numeric(): diff = Diffino(mode='pandas', left='/tmp/one_specific_cols.csv', right='/tmp/two_specific_cols.csv', convert_numeric=False) results = diff.build_diff() assert results
def test_output_xlsx_pandas(): diff = Diffino(mode='pandas', left='/tmp/one.csv', right='/tmp/two.csv', output='/tmp/diff.xslx') diff.build_diff() raise Exception('Finish test!')
def test_output_in_s3_pandas(): diff = Diffino(mode='pandas', left='/tmp/one.csv', right='/tmp/two.csv', output='s3://fake-bucket/diff.json') diff.build_diff() raise Exception('Finish test!')
def _create_diff( self, target_dir, left_csv="sample_left.csv", right_csv="sample_right.csv", to_console=False, cols=None, output_only_diffs=False, ): output_location = ( False if to_console else os.path.join(target_dir, "output.csv") ) output_left = os.path.join(target_dir, "output_not_in_left.csv") output_right = os.path.join(target_dir, "output_not_in_right.csv") location_left = fname = os.path.join(os.path.dirname(__file__), left_csv) location_right = fname = os.path.join(os.path.dirname(__file__), right_csv) diffino = Diffino( left=location_left, right=location_right, output=output_location, cols=cols, output_only_diffs=output_only_diffs, ) rows_count = diffino.build_diff() if not to_console and not output_only_diffs: assert os.path.isfile(output_left) assert os.path.isfile(output_right) return output_location, output_left, output_right, rows_count
def test_convert_numeric(): diff = Diffino( mode="pandas", left="/tmp/one_specific_cols.csv", right="/tmp/two_specific_cols.csv", convert_numeric=False, ) results = diff.build_diff() assert results
def test_output_in_s3_pandas(): diff = Diffino( mode="pandas", left="/tmp/one.csv", right="/tmp/two.csv", output="s3://fake-bucket/diff.json", ) diff.build_diff() raise Exception("Finish test!")
def _create_diff(self, target_dir, left_csv='sample_left.csv', right_csv='sample_right.csv', to_console=False, cols=None): output_location = False if to_console else os.path.join(target_dir, 'output.csv') output_left = os.path.join(target_dir, 'output_not_in_left.csv') output_right = os.path.join(target_dir, 'output_not_in_right.csv') location_left = fname = os.path.join(os.path.dirname(__file__), left_csv) location_right = fname = os.path.join(os.path.dirname(__file__), right_csv) diffino = Diffino(left=location_left, right=location_right, output=output_location, cols=cols) diffino.build_diff() if not to_console: assert os.path.isfile(output_left) assert os.path.isfile(output_right) return output_location, output_left, output_right
def test_diffino_s3_support(self, tmpdir): conn = boto3.resource("s3") # We need to create the bucket since this is all in Moto's 'virtual' AWS account bucket = "britedata-diff" conn.create_bucket(Bucket=bucket) s3 = boto3.client("s3") key_current = "current.csv" key_new = "new.csv" value = u"""address,state,zip,name,id eleven st,CA,66611,name eleven,11""" s3.put_object(Bucket=bucket, Key=key_current, Body=value) s3.put_object(Bucket=bucket, Key=key_new, Body=value) location_left = "s3://" + bucket + "/" + key_current location_right = "s3://" + bucket + "/" + key_new output_location = "s3://" + bucket + "/output.csv" diffino = Diffino( left=location_left, right=location_right, output=output_location ) diffino.build_diff() body_left = ( conn.Object(bucket, "output_not_in_left.csv") .get()["Body"] .read() .decode("utf-8") ) body_right = ( conn.Object(bucket, "output_not_in_right.csv") .get()["Body"] .read() .decode("utf-8") ) expected_result = u"""address,state,zip,name,id\n""" assert body_left == expected_result assert body_right == expected_result
def test_single_file_csv_local_md5(): diff = Diffino(mode="md5", left="/tmp/one.csv", right="/tmp/two.csv") results = diff.build_diff() assert results
def test_single_file_excel_local_pandas(): diff = Diffino(mode='pandas', left='/tmp/one.xlsx', right='/tmp/two.xlsx') results = diff.build_diff() assert results
def test_output_json_md5(): diff = Diffino( mode="md5", left="/tmp/one.csv", right="/tmp/two.csv", output="/tmp/diff.json" ) diff.build_diff() raise Exception("Finish test!")
def test_multiple_files_dir_md5(): diff = Diffino(mode="md5", left="/tmp/one", right="/tmp/two") results = diff.build_diff() assert results
def test_multiple_files_s3_pandas(): diff = Diffino(mode='pandas', left='s3://fake-bucket/one', right='s3://fake-bucket/two') results = diff.build_diff() assert results
def test_multiple_files_zip_pandas(): diff = Diffino(mode="pandas", left="/tmp/one.zip", right="/tmp/two.zip") results = diff.build_diff() assert results
def test_multiple_files_zip_pandas(): diff = Diffino(mode='pandas', left='/tmp/one.zip', right='/tmp/two.zip') results = diff.build_diff() assert results
def test_single_file_csv_s3_pandas(): diff = Diffino(mode='pandas', left='s3://fake-bucket/one.csv', right='s3://fake-bucket/two.csv') results = diff.build_diff() assert results
def test_single_file_excel_s3_md5(): diff = Diffino( mode="md5", left="s3://fake-bucket/one.xlsx", right="s3://fake-bucket/two.xlsx" ) results = diff.build_diff() assert results
def test_single_file_excel_local_pandas(): diff = Diffino(mode="pandas", left="/tmp/one.xlsx", right="/tmp/two.xlsx") results = diff.build_diff() assert results
def test_multiple_files_s3_pandas(): diff = Diffino(mode="pandas", left="s3://fake-bucket/one", right="s3://fake-bucket/two") results = diff.build_diff() assert results