def __init__(self, dataset): self.dataset = dataset self.query_model = QueryModel() # add the graph URIs if self.dataset.graph.graphs is not None: self.query_model.add_graphs(self.dataset.graph.graphs.values()) else: self.query_model.add_graphs([]) # add the prefixes for prefixes in self.dataset.graph.graph_prefixes.values(): self.query_model.add_prefixes(prefixes)
def __join_grouped_grouped(self, query_model1, query_model2): joined_query_model = JoinOperator.__create_outer_quer_model(query_model1, query_model2) joined_query_model.from_clause = copy.copy(query_model1.from_clause) QueryModel.clean_inner_qm(query_model1) QueryModel.clean_inner_qm(query_model2) # add subqueries if self.join_type == JoinType.InnerJoin: joined_query_model.add_subquery(query_model1) joined_query_model.add_subquery(query_model2) elif self.join_type == JoinType.LeftOuterJoin: joined_query_model.add_subquery(query_model1) joined_query_model.add_optional_subquery(query_model2) elif self.join_type == JoinType.RightOuterJoin: joined_query_model.add_subquery(query_model2) joined_query_model.add_optional_subquery(query_model1) else: # outer join query_model1.from_clause = copy.copy(joined_query_model.from_clause) return JoinOperator._outer_join(joined_query_model, query_model1, query_model2) return joined_query_model
def __wrap_in_subquery(query_model): """ create an outer query containing the variables, select columns, offset, limit, order, :param query_model: :return: """ new_query_model = QueryModel() new_query_model.variables = copy.copy(query_model.variables) # all prefixes are already in query_model1 new_query_model.from_clause = copy.copy(query_model.from_clause) new_query_model.select_columns = copy.copy(query_model.select_columns) new_query_model.offset = query_model.offset new_query_model.limit = query_model.limit new_query_model.order_clause = copy.copy(query_model.order_clause) return new_query_model
def _outer_join(joined_query_model, query_model1, query_model2): joined_query_model.from_clause = copy.copy(query_model1.from_clause) query_model1_copy = copy.deepcopy(query_model1) query_model2_copy = copy.deepcopy(query_model2) #if len(query_model1.groupBy_columns) > 0: if True: new_query_model1 = JoinOperator.__wrap_in_subquery(query_model1) QueryModel.clean_inner_qm(query_model2_copy) QueryModel.clean_inner_qm(query_model1_copy) new_query_model1.add_subquery(query_model1_copy) new_query_model1.add_optional_subquery(query_model2_copy) else: query_model1.add_optional_subquery(query_model2_copy) #if len(query_model2.groupBy_columns) > 0: if True: new_query_model2 = JoinOperator.__wrap_in_subquery(query_model2) new_query_model2.add_subquery(query_model2_copy) new_query_model2.add_optional_subquery(query_model1_copy) else: query_model2.add_optional_subquery(query_model1_copy) joined_query_model.add_unions(new_query_model1) joined_query_model.add_unions(new_query_model2) return joined_query_model
class Queue2QueryModelConverter(object): """ Converts the query buffer to a query model """ def __init__(self, dataset): self.dataset = dataset self.query_model = QueryModel() # add the graph URIs if self.dataset.graph.graphs is not None: self.query_model.add_graphs(self.dataset.graph.graphs.values()) else: self.query_model.add_graphs([]) # add the prefixes for prefixes in self.dataset.graph.graph_prefixes.values(): self.query_model.add_prefixes(prefixes) def to_query_model(self): """ converts the dataset to a query model :return: a query model """ # traverse the query queue self.traverse_dataset() return self.query_model def traverse_dataset(self): self.__traverse_dataset(self.dataset) def __traverse_dataset(self, ds): # check if this ds is a grouped_ds, process the parent ds before if ds.type() == "GroupedDataset": self.__traverse_dataset(ds.parent_dataset) queue = ds.query_queue current_qm = self.query_model for node in queue.queue: current_ds, current_qm, grouped_ds = node.visit_node( current_qm, ds, node) self.query_model = current_qm
def _outer_join(query_model1, query_model2): joined_query_model = QueryModel() joined_query_model.prefixes = copy.copy( query_model1.prefixes) # all prefixes are already in query_model1 joined_query_model.variables = copy.copy( query_model1.variables) # all prefixes are already in query_model1 joined_query_model.from_clause = copy.copy(query_model1.from_clause) joined_query_model.select_columns = copy.copy( query_model1.select_columns) joined_query_model.offset = query_model1.offset joined_query_model.limit = query_model1.limit joined_query_model.order_clause = copy.copy(query_model1.order_clause) QueryModel.clean_inner_qm(query_model1) QueryModel.clean_inner_qm(query_model2) joined_query_model.add_unions(query_model1) joined_query_model.add_unions(query_model2) return joined_query_model
def __join_grouped_grouped(self, query_model1, query_model2): joined_query_model = QueryModel() joined_query_model.prefixes = copy.copy( query_model1.prefixes) # all prefixes are already in query_model1 joined_query_model.add_prefixes(query_model2.prefixes) joined_query_model.variables = copy.copy( query_model1.variables.union(query_model2.variables) ) # all prefixes are already in query_model1 joined_query_model.from_clause = copy.copy(query_model1.from_clause) joined_query_model.select_columns = copy.copy( query_model1.select_columns.union(query_model2.select_columns)) joined_query_model.offset = min(query_model1.offset, query_model2.offset) joined_query_model.limit = max(query_model1.limit, query_model2.limit) query_model1.order_clause.update(query_model2.order_clause) joined_query_model.order_clause = copy.copy(query_model1.order_clause) QueryModel.clean_inner_qm(query_model1) QueryModel.clean_inner_qm(query_model2) # add subqueries if self.join_type == JoinType.InnerJoin: joined_query_model.add_subquery(query_model1) joined_query_model.add_subquery(query_model2) elif self.join_type == JoinType.LeftOuterJoin: joined_query_model.add_subquery(query_model1) joined_query_model.add_optional_subquery(query_model2) elif self.join_type == JoinType.RightOuterJoin: joined_query_model.add_subquery(query_model2) joined_query_model.add_optional_subquery(query_model1) else: # outer join joined_query_model.add_unions(query_model1) joined_query_model.add_unions(query_model2) return joined_query_model
def __join_expandable_grouped(self, query_model1, query_model2, expandable_order=1): if self.join_type == JoinType.OuterJoin: # outer join return JoinOperator._outer_join(query_model1, query_model2) elif self.join_type == JoinType.InnerJoin: # add query model 2 as a subquery QueryModel.clean_inner_qm(query_model2) query_model1.add_subquery(query_model2) return query_model1 elif ((expandable_order == 1 and self.join_type == JoinType.LeftOuterJoin) or\ (expandable_order == 2 and self.join_type == JoinType.RightOuterJoin)): # make the subquery optional QueryModel.clean_inner_qm(query_model2) query_model1.add_optional_subquery(query_model2) return query_model1 elif ((expandable_order == 2 and self.join_type == JoinType.LeftOuterJoin) or\ (expandable_order == 1 and self.join_type == JoinType.RightOuterJoin)): # create an outer query and add the main dataset as a subquery and the optional dataset as optional subquery joined_query_model = QueryModel() joined_query_model.prefixes = copy.copy( query_model1.prefixes ) # all prefixes are already in query_model1 joined_query_model.variables = copy.copy( query_model1.variables ) # all prefixes are already in query_model1 joined_query_model.from_clause = copy.copy( query_model1.from_clause) joined_query_model.select_columns = copy.copy( query_model1.select_columns) joined_query_model.offset = query_model1.offset joined_query_model.limit = query_model1.limit joined_query_model.order_clause = copy.copy( query_model1.order_clause) QueryModel.clean_inner_qm(query_model1) QueryModel.clean_inner_qm(query_model2) joined_query_model.add_optional_subquery(query_model1) joined_query_model.add_subquery(query_model2) return joined_query_model else: raise Exception("Undefined case")
def __create_outer_quer_model(query_model1, query_model2): """ Joins the prefixes, offset, limit, order and select clauses and variables of the two query models. Doesn't add the from clause from both variables :param query_model1: :param query_model2: :return: an outer query model """ query_model = QueryModel() #query_model.from_clause.clear() query_model.add_prefixes(query_model1.prefixes) query_model.add_prefixes(query_model2.prefixes) query_model.set_offset(min(query_model1.offset, query_model2.offset)) query_model.set_limit(max(query_model1.limit, query_model2.limit)) query_model.add_order_columns(query_model1.order_clause) query_model.add_order_columns(query_model2.order_clause) # TODO: WHY do this here? # union the select columns if len(query_model1.select_columns) > 0 and len(query_model1.select_columns) > 0: query_model.select_columns = query_model1.select_columns.union(query_model2.select_columns) elif len(query_model1.select_columns) > 0: query_model.select_columns = query_model1.select_columns.union(query_model2.variables) elif len(query_model2.select_columns) > 0: query_model.select_columns = query_model1.variables.union(query_model2.select_columns) # union the variables query_model.variables = query_model1.variables.union(query_model2.variables) return query_model
def __join_expandable_grouped(self, query_model1, query_model2, expandable_order=1): if self.join_type == JoinType.OuterJoin: # outer join joined_query_model = JoinOperator.__create_outer_quer_model(query_model1, query_model2) return JoinOperator._outer_join(joined_query_model, query_model1, query_model2) elif self.join_type == JoinType.InnerJoin : # add query model 2 as a subquery if expandable_order == 1: query_model1 = JoinOperator.__make_parent(query_model1, query_model2) QueryModel.clean_inner_qm(query_model2) query_model1.add_subquery(query_model2) return query_model1 else: query_model2 = JoinOperator.__make_parent(query_model2, query_model1) QueryModel.clean_inner_qm(query_model1) query_model2.add_subquery(query_model1) return query_model2 elif ((expandable_order == 1 and self.join_type == JoinType.LeftOuterJoin) or\ (expandable_order == 2 and self.join_type == JoinType.RightOuterJoin)): # make query model 2 an optional subquery if expandable_order == 1: query_model1 = JoinOperator.__make_parent(query_model1, query_model2) QueryModel.clean_inner_qm(query_model2) query_model1.add_optional_subquery(query_model2) return query_model1 else: query_model2 = JoinOperator.__make_parent(query_model2, query_model1) QueryModel.clean_inner_qm(query_model1) query_model2.add_optional_subquery(query_model1) return query_model2 elif ((expandable_order == 2 and self.join_type == JoinType.LeftOuterJoin) or\ (expandable_order == 1 and self.join_type == JoinType.RightOuterJoin)): # create an outer query and add the main dataset as a subquery and the optional dataset as optional subquery if expandable_order == 1: expandable_qm, grouped_qm = query_model1, query_model2 else: expandable_qm, grouped_qm = query_model2, query_model1 joined_query_model = JoinOperator.__wrap_in_subquery(expandable_qm) joined_query_model = JoinOperator.__make_parent(joined_query_model, grouped_qm) QueryModel.clean_inner_qm(expandable_qm) QueryModel.clean_inner_qm(grouped_qm) joined_query_model.add_optional_subquery(expandable_qm) joined_query_model.add_subquery(grouped_qm) return joined_query_model else: raise Exception("Undefined case of expandable grouped join")
from rdfframes.query_builder.querymodel import QueryModel from orderedset import OrderedSet if __name__ == '__main__': ####### complete query subquery = QueryModel() subquery.add_triple("tweet", "sioc:has_creater", "tweep") #subquery.add_variable("tweeter") subquery.add_group_columns(OrderedSet(["tweep"])) subquery.add_aggregate_pair("tweet", "COUNT", "tweet_count", "distinct") subquery.add_having_condition("tweet_count", "< 300") subquery.add_having_condition("tweet_count", "> 250") subquery.add_select_column("tweep") twitterquery = QueryModel() prefixes = { "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "to": "http://twitter.com/ontology/", "dcterms": "http://purl.org/dc/terms/", "xsd": "http://www.example.org/", "foaf": "http://xmlns.com/foaf/0.1/", } twitterquery.add_prefixes(prefixes) twitterquery.add_graphs(["http://twitter.com/"]) twitterquery.add_variable("tweep") twitterquery.add_subquery(subquery) twitterquery.add_triple("tweet", "sioc:has_creater", "tweep") twitterquery.add_triple("tweet", " sioc:content", "text")