def get_supplier_solutions(): """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl: SELECT ?suppkey, ?nationkey FROM supplier WHERE { ?supplier <tpch:suppkey> ?suppkey . ?supplier <tpch:nationkey> ?nationkey . } """ timer = FunctionTimer() short_prints = True suppliers = rdf.import_graph("supplier.ttl") timer.lap("suppliers", short=short_prints) # Find all triples that define a 'suppkey' (as predicate). bgp_suppkey_matches = clans.superstrict(suppliers, clans.from_dict({"p": rdflib.URIRef("tpch:suppkey")})) # Give the subject a name for later joining and object the name we need in the output. bgp_suppkey = clans.compose(bgp_suppkey_matches, clans.from_dict({"supplier": "s", "suppkey": "o"})) # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for # later joining and object the name we need in the output. bgp_nationkey = clans.compose( clans.superstrict(suppliers, clans.from_dict({"p": rdflib.URIRef("tpch:nationkey")})), clans.from_dict({"supplier": "s", "nationkey": "o"}), ) # Join the previous results on 'supplier' and project the columns we need. supplier_solutions = clans.project(clans.functional_cross_union(bgp_suppkey, bgp_nationkey), "nationkey", "suppkey") timer.end("supplier_solutions", short=short_prints) return supplier_solutions
def get_nations(regionname): """Execute the equivalent of the following XQuery statement and convert the XML into a clan: for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation> """ timer = FunctionTimer() short_prints = True # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.) regions = xml.import_xml('regions.xml', convert_numerics=True) timer.lap('regions', short=short_prints) # Get a clan where each region is a row. regions_clan = regions('regions')['region'] timer.lap('regions_clan', short=short_prints) # Filter this clan down to the region of interest (name is `regionname`). target_region = clans.superstrict(regions_clan, clans.from_dict({'name': regionname})) timer.lap('target_region', short=short_prints) # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data. nations_clan = target_region['nation'] timer.lap('nations_clan', short=short_prints) # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment'). nations = clans.compose( nations_clan, clans.from_dict({ 'nationkey': 'nationkey', 'nationname': 'name' })) timer.end('nations', short=short_prints) return nations
def get_nations(regionname): """Execute the equivalent of the following XQuery statement and convert the XML into a clan: for $x in doc("regions.xml")/regions/region[name="MIDDLE EAST"]/nation return <nation>{$x/nationkey}<nationname>{data($x/name)}</nationname></nation> """ timer = FunctionTimer() short_prints = True # Load the XML document. (Don't use multiplicity or sequence; our data doesn't require this.) regions = xml.import_xml("regions.xml", convert_numerics=True) timer.lap("regions", short=short_prints) # Get a clan where each region is a row. regions_clan = regions("regions")["region"] timer.lap("regions_clan", short=short_prints) # Filter this clan down to the region of interest (name is `regionname`). target_region = clans.superstrict(regions_clan, clans.from_dict({"name": regionname})) timer.lap("target_region", short=short_prints) # Get all 'nation' lefts out of this clan and create a clan where every row is a nation's data. nations_clan = target_region["nation"] timer.lap("nations_clan", short=short_prints) # Rename 'name' to 'nationname' and project 'nationkey' and 'nationname' (removing 'comment'). nations = clans.compose(nations_clan, clans.from_dict({"nationkey": "nationkey", "nationname": "name"})) timer.end("nations", short=short_prints) return nations
def get_supplier_solutions(): """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl: SELECT ?suppkey, ?nationkey FROM supplier WHERE { ?supplier <tpch:suppkey> ?suppkey . ?supplier <tpch:nationkey> ?nationkey . } """ timer = FunctionTimer() short_prints = True suppliers = rdf.import_graph('supplier.ttl') timer.lap('suppliers', short=short_prints) # Find all triples that define a 'suppkey' (as predicate). bgp_suppkey_matches = clans.superstrict( suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:suppkey')})) # Give the subject a name for later joining and object the name we need in the output. bgp_suppkey = clans.compose( bgp_suppkey_matches, clans.from_dict({ 'supplier': 's', 'suppkey': 'o' })) # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for # later joining and object the name we need in the output. bgp_nationkey = clans.compose( clans.superstrict( suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:nationkey')})), clans.from_dict({ 'supplier': 's', 'nationkey': 'o' })) # Join the previous results on 'supplier' and project the columns we need. supplier_solutions = clans.project( clans.cross_functional_union(bgp_suppkey, bgp_nationkey), 'nationkey', 'suppkey') timer.end('supplier_solutions', short=short_prints) return supplier_solutions
def match_and_project(graph: 'PP( AxA )', pattern: dict=None, projection: dict=None): """Return all relations in ``graph`` that contain all members of ``pattern``. Rename their lefts according to the members of ``projection``. :param graph: An absolute clan. :param pattern: A dictionary where the keys are the lefts and the values the rights that will be matched. :param projection: A dictionary where the values are the new names and the keys the existing names of the lefts to be renamed. """ assert(_clans.is_member(graph)) if pattern is None: pattern = {} if projection is None: projection = {} matches = pattern_match(graph, pattern) compose_ctrl_set = _clans.transpose(_clans.from_dict(projection)) return _clans.compose(matches, compose_ctrl_set, _checked=False)
def match_and_project(graph: 'PP(A x A)', pattern: dict = None, projection: dict = None): r"""Return all relations in ``graph`` that contain all members of ``pattern``. Rename their lefts according to the members of ``projection``. :param graph: An absolute :term:`clan`. :param pattern: A dictionary where the keys are the :term:`left`\s and the values the :term:`right`\s that will be matched. :param projection: A dictionary where the values are the new names and the keys the existing names of the :term:`left`\s to be renamed. """ assert _clans.is_member(graph) if pattern is None: pattern = {} if projection is None: projection = {} matches = pattern_match(graph, pattern) compose_ctrl_set = _clans.transpose(_clans.from_dict(projection)) return _clans.compose(matches, compose_ctrl_set, _checked=False)
print("salutation_records_clan:", salutation_records_clan) print("earth_records_clan:", earth_records_clan) # By choosing an appropriate right-hand argument, our extended composition operation from earlier can # model projection. words_langs_clan = Set(Set(Couplet('word', 'word'), Couplet('language', 'language'))) print("words_langs_clan:", words_langs_clan) # The relations.diag and clans.diag utility functions create a "diagonal" relation or clan, # respectively, with simpler syntax. assert words_langs_clan == clans.diag('word', 'language') # Since the meaning of each set of records ('salutation') is invariant among the relations in # salutation_records_clan, we can drop those Couplets. Note that the cardinality of the resulting # clan is the same, but each relation now contains only two Couplets. salutation_words_n_langs_clan = clans.compose(salutation_records_clan, words_langs_clan) print("salutation_words_n_langs_clan:", salutation_words_n_langs_clan) # However, we can take this one step further and "rename" the 'word' attribute to something more # specific by replacing the value 'word' with 'salutation' everywhere we find it as the left of a # Couplet. By doing this, we both compress the information in each relation and also set our data up # for later processing. salutations_n_langs_clan = clans.compose(salutation_words_n_langs_clan, Set(Set(Couplet("salutation", "word"), Couplet("language", "language")))) print("salutations_n_langs_clan:", salutations_n_langs_clan) # We'll do the same for earth_records_clan, but do the projection and "rename" all in one # composition operation. earths_n_langs_clan = clans.compose(earth_records_clan, Set(Set(Couplet("earth", "word"),
print("earth_records_clan:", earth_records_clan) # By choosing an appropriate right-hand argument, our extended composition operation from earlier # can model projection. words_langs_clan = Set( Set(Couplet('word', 'word'), Couplet('language', 'language'))) print("words_langs_clan:", words_langs_clan) # The relations.diag and clans.diag utility functions create a "diagonal" relation or clan, # respectively, with simpler syntax. assert words_langs_clan == clans.diag('word', 'language') # Since the meaning of each set of records ('salutation') is invariant among the relations in # salutation_records_clan, we can drop those Couplets. Note that the cardinality of the resulting # clan is the same, but each relation now contains only two Couplets. salutation_words_n_langs_clan = clans.compose(salutation_records_clan, words_langs_clan) print("salutation_words_n_langs_clan:", salutation_words_n_langs_clan) # However, we can take this one step further and "rename" the 'word' attribute to something more # specific by replacing the value 'word' with 'salutation' everywhere we find it as the left of a # Couplet. By doing this, we both compress the information in each relation and also set our data up # for later processing. salutations_n_langs_clan = clans.compose( salutation_words_n_langs_clan, Set(Set(Couplet("salutation", "word"), Couplet("language", "language")))) print("salutations_n_langs_clan:", salutations_n_langs_clan) # We'll do the same for earth_records_clan, but do the projection and "rename" all in one # composition operation. earths_n_langs_clan = clans.compose( earth_records_clan,