Exemple #1
0
def test_specific_cols():
    diff = Diffino(mode="pandas",
                   left="/tmp/one.csv",
                   right="/tmp/two.csv",
                   cols=["id", "name"])
    results = diff.build_diff()
    assert results
Exemple #2
0
def test_specific_cols():
    diff = Diffino(mode='pandas',
                   left='/tmp/one.csv',
                   right='/tmp/two.csv',
                   cols=['id', 'name'])
    results = diff.build_diff()
    assert results
Exemple #3
0
def test_output_csv_pandas():
    diff = Diffino(mode="pandas",
                   left="/tmp/one.csv",
                   right="/tmp/two.csv",
                   output="/tmp/diff.csv")
    diff.build_diff()
    raise Exception("Finish test!")
Exemple #4
0
def test_convert_numeric():
    diff = Diffino(mode='pandas',
                   left='/tmp/one_specific_cols.csv',
                   right='/tmp/two_specific_cols.csv',
                   convert_numeric=False)
    results = diff.build_diff()
    assert results
Exemple #5
0
def test_output_xlsx_pandas():
    diff = Diffino(mode='pandas',
                   left='/tmp/one.csv',
                   right='/tmp/two.csv',
                   output='/tmp/diff.xslx')
    diff.build_diff()
    raise Exception('Finish test!')
Exemple #6
0
def test_output_in_s3_pandas():
    diff = Diffino(mode='pandas',
                   left='/tmp/one.csv',
                   right='/tmp/two.csv',
                   output='s3://fake-bucket/diff.json')
    diff.build_diff()
    raise Exception('Finish test!')
Exemple #7
0
    def _create_diff(
        self,
        target_dir,
        left_csv="sample_left.csv",
        right_csv="sample_right.csv",
        to_console=False,
        cols=None,
        output_only_diffs=False,
    ):
        output_location = (
            False if to_console else os.path.join(target_dir, "output.csv")
        )
        output_left = os.path.join(target_dir, "output_not_in_left.csv")
        output_right = os.path.join(target_dir, "output_not_in_right.csv")

        location_left = fname = os.path.join(os.path.dirname(__file__), left_csv)
        location_right = fname = os.path.join(os.path.dirname(__file__), right_csv)
        diffino = Diffino(
            left=location_left,
            right=location_right,
            output=output_location,
            cols=cols,
            output_only_diffs=output_only_diffs,
        )

        rows_count = diffino.build_diff()

        if not to_console and not output_only_diffs:
            assert os.path.isfile(output_left)
            assert os.path.isfile(output_right)
        return output_location, output_left, output_right, rows_count
Exemple #8
0
def test_convert_numeric():
    diff = Diffino(
        mode="pandas",
        left="/tmp/one_specific_cols.csv",
        right="/tmp/two_specific_cols.csv",
        convert_numeric=False,
    )
    results = diff.build_diff()
    assert results
Exemple #9
0
def test_output_in_s3_pandas():
    diff = Diffino(
        mode="pandas",
        left="/tmp/one.csv",
        right="/tmp/two.csv",
        output="s3://fake-bucket/diff.json",
    )
    diff.build_diff()
    raise Exception("Finish test!")
Exemple #10
0
  def _create_diff(self, target_dir, left_csv='sample_left.csv', right_csv='sample_right.csv',
                  to_console=False, cols=None):
    output_location = False if to_console else os.path.join(target_dir, 'output.csv')
    output_left = os.path.join(target_dir, 'output_not_in_left.csv')
    output_right = os.path.join(target_dir, 'output_not_in_right.csv')

    location_left = fname = os.path.join(os.path.dirname(__file__), left_csv)
    location_right = fname = os.path.join(os.path.dirname(__file__), right_csv)
    diffino = Diffino(left=location_left, right=location_right, output=output_location, cols=cols)

    diffino.build_diff()

    if not to_console:
      assert os.path.isfile(output_left)
      assert os.path.isfile(output_right)
    return output_location, output_left, output_right
Exemple #11
0
    def test_diffino_s3_support(self, tmpdir):
        conn = boto3.resource("s3")
        # We need to create the bucket since this is all in Moto's 'virtual' AWS account
        bucket = "britedata-diff"
        conn.create_bucket(Bucket=bucket)
        s3 = boto3.client("s3")

        key_current = "current.csv"
        key_new = "new.csv"
        value = u"""address,state,zip,name,id
eleven st,CA,66611,name eleven,11"""
        s3.put_object(Bucket=bucket, Key=key_current, Body=value)
        s3.put_object(Bucket=bucket, Key=key_new, Body=value)

        location_left = "s3://" + bucket + "/" + key_current
        location_right = "s3://" + bucket + "/" + key_new
        output_location = "s3://" + bucket + "/output.csv"
        diffino = Diffino(
            left=location_left, right=location_right, output=output_location
        )
        diffino.build_diff()

        body_left = (
            conn.Object(bucket, "output_not_in_left.csv")
            .get()["Body"]
            .read()
            .decode("utf-8")
        )
        body_right = (
            conn.Object(bucket, "output_not_in_right.csv")
            .get()["Body"]
            .read()
            .decode("utf-8")
        )

        expected_result = u"""address,state,zip,name,id\n"""
        assert body_left == expected_result
        assert body_right == expected_result
Exemple #12
0
def test_single_file_csv_local_md5():
    diff = Diffino(mode="md5", left="/tmp/one.csv", right="/tmp/two.csv")
    results = diff.build_diff()
    assert results
Exemple #13
0
def test_single_file_excel_local_pandas():
    diff = Diffino(mode='pandas', left='/tmp/one.xlsx', right='/tmp/two.xlsx')
    results = diff.build_diff()
    assert results
Exemple #14
0
def test_output_json_md5():
    diff = Diffino(
        mode="md5", left="/tmp/one.csv", right="/tmp/two.csv", output="/tmp/diff.json"
    )
    diff.build_diff()
    raise Exception("Finish test!")
Exemple #15
0
def test_multiple_files_dir_md5():
    diff = Diffino(mode="md5", left="/tmp/one", right="/tmp/two")
    results = diff.build_diff()
    assert results
Exemple #16
0
def test_multiple_files_s3_pandas():
    diff = Diffino(mode='pandas',
                   left='s3://fake-bucket/one',
                   right='s3://fake-bucket/two')
    results = diff.build_diff()
    assert results
Exemple #17
0
def test_multiple_files_zip_pandas():
    diff = Diffino(mode="pandas", left="/tmp/one.zip", right="/tmp/two.zip")
    results = diff.build_diff()
    assert results
Exemple #18
0
def test_multiple_files_zip_pandas():
    diff = Diffino(mode='pandas', left='/tmp/one.zip', right='/tmp/two.zip')
    results = diff.build_diff()
    assert results
Exemple #19
0
def test_single_file_csv_s3_pandas():
    diff = Diffino(mode='pandas',
                   left='s3://fake-bucket/one.csv',
                   right='s3://fake-bucket/two.csv')
    results = diff.build_diff()
    assert results
Exemple #20
0
def test_single_file_excel_s3_md5():
    diff = Diffino(
        mode="md5", left="s3://fake-bucket/one.xlsx", right="s3://fake-bucket/two.xlsx"
    )
    results = diff.build_diff()
    assert results
Exemple #21
0
def test_single_file_excel_local_pandas():
    diff = Diffino(mode="pandas", left="/tmp/one.xlsx", right="/tmp/two.xlsx")
    results = diff.build_diff()
    assert results
Exemple #22
0
def test_multiple_files_s3_pandas():
    diff = Diffino(mode="pandas",
                   left="s3://fake-bucket/one",
                   right="s3://fake-bucket/two")
    results = diff.build_diff()
    assert results