Beispiel #1
0
def get_supplier_solutions():
    """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl:
        SELECT
            ?suppkey, ?nationkey
        FROM
            supplier
        WHERE {
            ?supplier <tpch:suppkey> ?suppkey .
            ?supplier <tpch:nationkey> ?nationkey .
        }
    """
    timer = FunctionTimer()
    short_prints = True

    suppliers = rdf.import_graph("supplier.ttl")
    timer.lap("suppliers", short=short_prints)

    # Find all triples that define a 'suppkey' (as predicate).
    bgp_suppkey_matches = clans.superstrict(suppliers, clans.from_dict({"p": rdflib.URIRef("tpch:suppkey")}))
    # Give the subject a name for later joining and object the name we need in the output.
    bgp_suppkey = clans.compose(bgp_suppkey_matches, clans.from_dict({"supplier": "s", "suppkey": "o"}))

    # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for
    # later joining and object the name we need in the output.
    bgp_nationkey = clans.compose(
        clans.superstrict(suppliers, clans.from_dict({"p": rdflib.URIRef("tpch:nationkey")})),
        clans.from_dict({"supplier": "s", "nationkey": "o"}),
    )

    # Join the previous results on 'supplier' and project the columns we need.
    supplier_solutions = clans.project(clans.functional_cross_union(bgp_suppkey, bgp_nationkey), "nationkey", "suppkey")
    timer.end("supplier_solutions", short=short_prints)

    return supplier_solutions
Beispiel #2
0
def get_nations(regionname):
    """Execute the equivalent of the following XQuery statement and convert the XML into a clan:
        for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation
            return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation>
    """
    timer = FunctionTimer()
    short_prints = True

    # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.)
    regions = xml.import_xml('regions.xml', convert_numerics=True)
    timer.lap('regions', short=short_prints)

    # Get a clan where each region is a row.
    regions_clan = regions('regions')['region']
    timer.lap('regions_clan', short=short_prints)

    # Filter this clan down to the region of interest (name is `regionname`).
    target_region = clans.superstrict(regions_clan,
                                      clans.from_dict({'name': regionname}))
    timer.lap('target_region', short=short_prints)

    # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data.
    nations_clan = target_region['nation']
    timer.lap('nations_clan', short=short_prints)

    # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment').
    nations = clans.compose(
        nations_clan,
        clans.from_dict({
            'nationkey': 'nationkey',
            'nationname': 'name'
        }))
    timer.end('nations', short=short_prints)

    return nations
Beispiel #3
0
def get_nations(regionname):
    """Execute the equivalent of the following XQuery statement and convert the XML into a clan:
        for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation
            return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation>
    """
    timer = FunctionTimer()
    short_prints = True

    # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.)
    regions = xml.import_xml("regions.xml", convert_numerics=True)
    timer.lap("regions", short=short_prints)

    # Get a clan where each region is a row.
    regions_clan = regions("regions")["region"]
    timer.lap("regions_clan", short=short_prints)

    # Filter this clan down to the region of interest (name is `regionname`).
    target_region = clans.superstrict(regions_clan, clans.from_dict({"name": regionname}))
    timer.lap("target_region", short=short_prints)

    # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data.
    nations_clan = target_region["nation"]
    timer.lap("nations_clan", short=short_prints)

    # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment').
    nations = clans.compose(nations_clan, clans.from_dict({"nationkey": "nationkey", "nationname": "name"}))
    timer.end("nations", short=short_prints)

    return nations
Beispiel #4
0
def get_supplier_solutions():
    """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl:
        SELECT
            ?suppkey, ?nationkey
        FROM
            supplier
        WHERE {
            ?supplier <tpch:suppkey> ?suppkey .
            ?supplier <tpch:nationkey> ?nationkey .
        }
    """
    timer = FunctionTimer()
    short_prints = True

    suppliers = rdf.import_graph('supplier.ttl')
    timer.lap('suppliers', short=short_prints)

    # Find all triples that define a 'suppkey' (as predicate).
    bgp_suppkey_matches = clans.superstrict(
        suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:suppkey')}))
    # Give the subject a name for later joining and object the name we need in the output.
    bgp_suppkey = clans.compose(
        bgp_suppkey_matches, clans.from_dict({
            'supplier': 's',
            'suppkey': 'o'
        }))

    # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for
    # later joining and object the name we need in the output.
    bgp_nationkey = clans.compose(
        clans.superstrict(
            suppliers, clans.from_dict({'p':
                                        rdflib.URIRef('tpch:nationkey')})),
        clans.from_dict({
            'supplier': 's',
            'nationkey': 'o'
        }))

    # Join the previous results on 'supplier' and project the columns we need.
    supplier_solutions = clans.project(
        clans.cross_functional_union(bgp_suppkey, bgp_nationkey), 'nationkey',
        'suppkey')
    timer.end('supplier_solutions', short=short_prints)

    return supplier_solutions
Beispiel #5
0
def match_and_project(graph: 'PP( AxA )', pattern: dict=None, projection: dict=None):
    """Return all relations in ``graph`` that contain all members of ``pattern``. Rename their lefts
    according to the members of ``projection``.

    :param graph: An absolute clan.
    :param pattern: A dictionary where the keys are the lefts and the values the rights that
        will be matched.
    :param projection: A dictionary where the values are the new names and the keys the existing
        names of the lefts to be renamed.
    """
    assert(_clans.is_member(graph))
    if pattern is None:
        pattern = {}
    if projection is None:
        projection = {}

    matches = pattern_match(graph, pattern)
    compose_ctrl_set = _clans.transpose(_clans.from_dict(projection))
    return _clans.compose(matches, compose_ctrl_set, _checked=False)
Beispiel #6
0
def match_and_project(graph: 'PP(A x A)',
                      pattern: dict = None,
                      projection: dict = None):
    r"""Return all relations in ``graph`` that contain all members of ``pattern``. Rename their
    lefts according to the members of ``projection``.

    :param graph: An absolute :term:`clan`.
    :param pattern: A dictionary where the keys are the :term:`left`\s and the values the
        :term:`right`\s that will be matched.
    :param projection: A dictionary where the values are the new names and the keys the existing
        names of the :term:`left`\s to be renamed.
    """
    assert _clans.is_member(graph)
    if pattern is None:
        pattern = {}
    if projection is None:
        projection = {}

    matches = pattern_match(graph, pattern)
    compose_ctrl_set = _clans.transpose(_clans.from_dict(projection))
    return _clans.compose(matches, compose_ctrl_set, _checked=False)
Beispiel #7
0
print("salutation_records_clan:", salutation_records_clan)
print("earth_records_clan:", earth_records_clan)

# By choosing an appropriate right-hand argument, our extended composition operation from earlier can
# model projection.
words_langs_clan = Set(Set(Couplet('word', 'word'), Couplet('language', 'language')))
print("words_langs_clan:", words_langs_clan)

# The relations.diag and clans.diag utility functions create a "diagonal" relation or clan,
# respectively, with simpler syntax.
assert words_langs_clan == clans.diag('word', 'language')

# Since the meaning of each set of records ('salutation') is invariant among the relations in
# salutation_records_clan, we can drop those Couplets. Note that the cardinality of the resulting
# clan is the same, but each relation now contains only two Couplets.
salutation_words_n_langs_clan = clans.compose(salutation_records_clan, words_langs_clan)
print("salutation_words_n_langs_clan:", salutation_words_n_langs_clan)

# However, we can take this one step further and "rename" the 'word' attribute to something more
# specific by replacing the value 'word' with 'salutation' everywhere we find it as the left of a
# Couplet. By doing this, we both compress the information in each relation and also set our data up
# for later processing.
salutations_n_langs_clan = clans.compose(salutation_words_n_langs_clan,
                                         Set(Set(Couplet("salutation", "word"),
                                                 Couplet("language", "language"))))
print("salutations_n_langs_clan:", salutations_n_langs_clan)

# We'll do the same for earth_records_clan, but do the projection and "rename" all in one
# composition operation.
earths_n_langs_clan = clans.compose(earth_records_clan,
                                    Set(Set(Couplet("earth", "word"),
Beispiel #8
0
print("earth_records_clan:", earth_records_clan)

# By choosing an appropriate right-hand argument, our extended composition operation from earlier
# can model projection.
words_langs_clan = Set(
    Set(Couplet('word', 'word'), Couplet('language', 'language')))
print("words_langs_clan:", words_langs_clan)

# The relations.diag and clans.diag utility functions create a "diagonal" relation or clan,
# respectively, with simpler syntax.
assert words_langs_clan == clans.diag('word', 'language')

# Since the meaning of each set of records ('salutation') is invariant among the relations in
# salutation_records_clan, we can drop those Couplets. Note that the cardinality of the resulting
# clan is the same, but each relation now contains only two Couplets.
salutation_words_n_langs_clan = clans.compose(salutation_records_clan,
                                              words_langs_clan)
print("salutation_words_n_langs_clan:", salutation_words_n_langs_clan)

# However, we can take this one step further and "rename" the 'word' attribute to something more
# specific by replacing the value 'word' with 'salutation' everywhere we find it as the left of a
# Couplet. By doing this, we both compress the information in each relation and also set our data up
# for later processing.
salutations_n_langs_clan = clans.compose(
    salutation_words_n_langs_clan,
    Set(Set(Couplet("salutation", "word"), Couplet("language", "language"))))
print("salutations_n_langs_clan:", salutations_n_langs_clan)

# We'll do the same for earth_records_clan, but do the projection and "rename" all in one
# composition operation.
earths_n_langs_clan = clans.compose(
    earth_records_clan,