def test_it_generates_query_with_simple_and_composite_matches():
    resp = make_query(
        {
            "Database": "amazonreviews",
            "Table": "amazon_reviews_parquet",
            "Columns": [
                {"Column": "a.b.c", "Type": "Simple",},
                {
                    "Columns": ["user.first_name", "user.last_name"],
                    "Type": "Composite",
                },
            ],
            "DataMapperId": "dm_1234",
            "JobId": "job_1234567890",
        },
    )
    assert escape_resp(resp) == escape_resp(
        """
            SELECT DISTINCT t."$path"
            FROM "amazonreviews"."amazon_reviews_parquet" t,
                "s3f2_manifests_database"."s3f2_manifests_table" m
            WHERE m."jobid"='job_1234567890'
            AND m."datamapperid"='dm_1234' AND

            ((cast(t."a"."b"."c" as varchar)=m."queryablematchid" AND m."queryablecolumns"='a.b.c') OR

            (concat(t."user"."first_name", '_S3F2COMP_', t."user"."last_name")=m."queryablematchid" AND
            m."queryablecolumns"='user.first_name_S3F2COMP_user.last_name'))
        """
    )
def test_it_generates_query_with_multiple_partitions():
    resp = make_query(
        {
            "Database": "amazonreviews",
            "Table": "amazon_reviews_parquet",
            "Columns": [{"Column": "customer_id", "Type": "Simple",}],
            "PartitionKeys": [
                {"Key": "product_category", "Value": "Books"},
                {"Key": "published", "Value": "2019"},
            ],
            "DataMapperId": "dm_1234",
            "JobId": "job_1234567890",
        }
    )
    assert escape_resp(resp) == escape_resp(
        """
            SELECT DISTINCT t."$path"
            FROM "amazonreviews"."amazon_reviews_parquet" t,
                "s3f2_manifests_database"."s3f2_manifests_table" m
            WHERE m."jobid"='job_1234567890'
            AND m."datamapperid"='dm_1234' AND

            ((cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'))

            AND "product_category" = 'Books' AND "published" = '2019'
        """
    )
def test_it_generates_query_with_multiple_columns():
    resp = make_query(
        {
            "Database": "amazonreviews",
            "Table": "amazon_reviews_parquet",
            "Columns": [
                {"Column": "a", "Type": "Simple"},
                {"Column": "b", "Type": "Simple"},
            ],
            "DataMapperId": "dm_1234",
            "JobId": "job_1234567890",
        }
    )
    assert escape_resp(resp) == escape_resp(
        """
            SELECT DISTINCT t."$path"
            FROM "amazonreviews"."amazon_reviews_parquet" t,
                "s3f2_manifests_database"."s3f2_manifests_table" m
            WHERE m."jobid"='job_1234567890'
            AND m."datamapperid"='dm_1234' AND

            ((cast(t."a" as varchar)=m."queryablematchid" AND m."queryablecolumns"='a') OR
            (cast(t."b" as varchar)=m."queryablematchid" AND m."queryablecolumns"='b'))
        """
    )
예제 #4
0
def test_it_generates_query_with_composite_matches():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [
            {
                "Columns": ["user.first_name", "user.last_name"],
                "MatchIds": [["John", "Doe"], ["Jane", "Doe"]],
                "Type": "Composite",
            },
            {
                "Columns": ["user.age", "user.last_name"],
                "MatchIds": [[28, "Smith"]],
                "Type": "Composite",
            },
            {
                "Columns": ["user.userid"],
                "MatchIds": [["123456"]],
                "Type": "Composite",
            },
        ],
    })

    assert (
        escape_resp(resp) == 'SELECT DISTINCT "$path" '
        'FROM "amazonreviews"."amazon_reviews_parquet" '
        'WHERE (concat("user"."first_name", \'_S3F2COMP_\', "user"."last_name") '
        "in ('John_S3F2COMP_Doe', 'Jane_S3F2COMP_Doe') OR "
        'concat("user"."age", \'_S3F2COMP_\', "user"."last_name") '
        "in ('28_S3F2COMP_Smith') OR "
        '"user"."userid" in (\'123456\'))')
def test_it_generates_query_with_multiple_partitions():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": ["123456", "456789"]
        }],
        "PartitionKeys": [
            {
                "Key": "product_category",
                "Value": "Books"
            },
            {
                "Key": "published",
                "Value": "2019"
            },
        ],
    })

    assert (escape_resp(resp) == 'SELECT DISTINCT "$path" '
            'FROM "amazonreviews"."amazon_reviews_parquet" '
            "WHERE (\"customer_id\" in ('123456', '456789')) "
            "AND \"product_category\" = 'Books' "
            "AND \"published\" = '2019'")
예제 #6
0
def test_it_generates_query_with_simple_and_composite_matches():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [
            {
                "Column": "a.b.c",
                "MatchIds": ["a123456", "b123456"],
                "Type": "Simple",
            },
            {
                "Columns": ["user.first_name", "user.last_name"],
                "MatchIds": [["John", "Doe"], ["Jane", "Doe"]],
                "Type": "Composite",
            },
        ],
    })

    assert (
        escape_resp(resp) == 'SELECT DISTINCT "$path" '
        'FROM "amazonreviews"."amazon_reviews_parquet" '
        'WHERE ("a"."b"."c" in (\'a123456\', \'b123456\') '
        'OR concat("user"."first_name", \'_S3F2COMP_\', "user"."last_name") '
        "in ('John_S3F2COMP_Doe', 'Jane_S3F2COMP_Doe'))")
예제 #7
0
def test_it_generates_query_with_multiple_columns():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [
            {
                "Column": "a",
                "MatchIds": ["a123456", "b123456"],
                "Type": "Simple"
            },
            {
                "Column": "b",
                "MatchIds": ["a456789", "b456789"],
                "Type": "Simple"
            },
        ],
    })

    assert (
        escape_resp(resp) == 'SELECT DISTINCT "$path" '
        'FROM "amazonreviews"."amazon_reviews_parquet" '
        "WHERE (\"a\" in ('a123456', 'b123456') OR \"b\" in ('a456789', 'b456789'))"
    )
def test_it_generates_query_with_columns_of_complex_type():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "a.b.c",
            "MatchIds": ["a123456", "b123456"]
        }],
    })

    assert (escape_resp(resp) == 'SELECT DISTINCT "$path" '
            'FROM "amazonreviews"."amazon_reviews_parquet" '
            'WHERE ("a"."b"."c" in (\'a123456\', \'b123456\'))')
def test_it_generates_query_without_partition():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": ["123456", "456789"]
        }],
    })

    assert (escape_resp(resp) == 'SELECT DISTINCT "$path" '
            'FROM "amazonreviews"."amazon_reviews_parquet" '
            "WHERE (\"customer_id\" in ('123456', '456789'))")
def test_it_generates_query_without_partition():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": ["123456", "456789"]
        }]
    })

    assert "SELECT DISTINCT \"$path\" " \
           "FROM \"amazonreviews\".\"amazon_reviews_parquet\" " \
           "WHERE (\"customer_id\" in ('123456', '456789'))" == re.sub("[\x00-\x20]+", " ", resp.strip())
예제 #11
0
def test_it_generates_query_with_partition_and_int_column():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": [123456, 456789]
        }],
        "PartitionKeys": [{
            "Key": "product_category",
            "Value": "Books"
        }],
    })

    assert (escape_resp(resp) == 'SELECT DISTINCT "$path" '
            'FROM "amazonreviews"."amazon_reviews_parquet" '
            'WHERE ("customer_id" in (123456, 456789)) '
            "AND \"product_category\" = 'Books'")
def test_it_generates_query_with_partition():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": ["123456", "456789"]
        }],
        "PartitionKeys": [{
            "Key": "product_category",
            "Value": "Books"
        }]
    })

    assert "SELECT DISTINCT \"$path\" " \
           "FROM \"amazonreviews\".\"amazon_reviews_parquet\" " \
           "WHERE (\"customer_id\" in ('123456', '456789')) " \
           "AND \"product_category\" = 'Books'" == re.sub("[\x00-\x20]+", " ", resp.strip())
예제 #13
0
def test_it_generates_query_with_int_partition():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [{
            "Column": "customer_id",
            "MatchIds": ["123456", "456789"],
            "Type": "Simple",
        }],
        "PartitionKeys": [{
            "Key": "year",
            "Value": 2010
        }],
    })

    assert (escape_resp(resp) == 'SELECT DISTINCT "$path" '
            'FROM "amazonreviews"."amazon_reviews_parquet" '
            "WHERE (\"customer_id\" in ('123456', '456789')) "
            'AND "year" = 2010')
def test_it_generates_query_with_multiple_columns():
    resp = make_query({
        "Database":
        "amazonreviews",
        "Table":
        "amazon_reviews_parquet",
        "Columns": [
            {
                "Column": "a",
                "MatchIds": ["a123456", "b123456"]
            },
            {
                "Column": "b",
                "MatchIds": ["a456789", "b456789"]
            },
        ]
    })

    assert "SELECT DISTINCT \"$path\" " \
           "FROM \"amazonreviews\".\"amazon_reviews_parquet\" " \
           "WHERE (\"a\" in ('a123456', 'b123456') OR \"b\" in ('a456789', 'b456789'))" == re.sub("[\x00-\x20]+", " ",
                                                                                                  resp.strip())
def test_it_generates_query_with_int_partition():
    resp = make_query(
        {
            "Database": "amazonreviews",
            "Table": "amazon_reviews_parquet",
            "Columns": [{"Column": "customer_id", "Type": "Simple",}],
            "PartitionKeys": [{"Key": "year", "Value": 2010}],
            "DataMapperId": "dm_1234",
            "JobId": "job_1234567890",
        }
    )
    assert escape_resp(resp) == escape_resp(
        """
            SELECT DISTINCT t."$path"
            FROM "amazonreviews"."amazon_reviews_parquet" t,
                "s3f2_manifests_database"."s3f2_manifests_table" m
            WHERE m."jobid"='job_1234567890'
            AND m."datamapperid"='dm_1234' AND

            ((cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'))

            AND "year" = 2010
        """
    )