def get_start_query(subj, pred, obj, table_name, fetch_size=500): """ Get a prepared SQL query which starts scanning for a triple pattern and the parameters used to execute it. """ kind = get_kind(subj, pred, obj) query = "SELECT * FROM {} ".format(table_name) params = [] if kind == 'spo': query += "WHERE subject = ? AND predicate = ? AND object = ? ORDER BY subject, predicate, object" params = (subj, pred, obj) elif kind == '???': # query += ' ORDER BY subject, predicate, object' query += ' ORDER BY predicate, object, subject' elif kind == 's??': query += "WHERE subject = ? ORDER BY subject, predicate, object" params = [subj] elif kind == 'sp?': query += "WHERE subject = ? AND predicate = ? ORDER BY subject, predicate, object" params = (subj, pred) elif kind == '?p?': query += "WHERE predicate = ? ORDER BY predicate, object, subject" params = [pred] elif kind == '?po': query += "WHERE predicate = ? AND object = ? ORDER BY predicate, object, subject" params = (pred, obj) elif kind == 's?o': query += "WHERE subject = ? AND object = ? ORDER BY object, subject, predicate" params = (subj, obj) elif kind == '??o': query += "WHERE object = ? ORDER BY object, subject, predicate" params = [obj] else: raise Exception("Unkown pattern type: {}".format(kind)) return query, params
def get_resume_query(subj: str, pred: str, obj: str, last_read: Tuple[str, str, str, datetime, datetime], table_name: str, symbol: str = ">=") -> Tuple[str, str]: """Get a prepared SQL query which resumes scanning for a triple pattern. The SQL query rely on keyset pagination to resume query processing using an optimized Index Scan. Args: * subj: Subject of the triple pattern. * pred: Predicate of the triple pattern. * obj: Object of the triple pattern. * last_read: The SQL row from which to resume scanning. * table_name: Name of the SQL table to scan for RDF triples. * symbol: Symbol used to perform the keyset pagination. Defaults to ">=". Returns: A tuple with the prepared SQL query and its parameters. """ last_s, last_p, last_o, last_insert_t, last_delete_t = last_read kind = get_kind(subj, pred, obj) query = f"SELECT * FROM {table_name} " if kind == 'spo': return None, None elif kind == '???': query += f"""WHERE (subject, predicate, md5(object), insert_t, delete_t) {symbol} (%s, %s, md5(%s), %s, %s) ORDER BY subject, predicate, md5(object), insert_t, delete_t""" return query, (last_s, last_p, last_o, last_insert_t, last_delete_t) elif kind == 's??': query += f"""WHERE subject = %s AND (predicate, md5(object), insert_t, delete_t) {symbol} (%s, md5(%s), %s, %s) ORDER BY subject, predicate, md5(object), insert_t, delete_t""" return query, (last_s, last_p, last_o, last_insert_t, last_delete_t) elif kind == 'sp?': query += f"""WHERE subject = %s AND predicate = %s AND (md5(object), insert_t, delete_t) {symbol} (md5(%s), %s, %s) ORDER BY subject, predicate, md5(object), insert_t, delete_t""" return query, (last_s, last_p, last_o, last_insert_t, last_delete_t) elif kind == '?p?': query += f"""WHERE predicate = %s AND (md5(object), subject, insert_t, delete_t) {symbol} (md5(%s), %s, %s, %s) ORDER BY predicate, md5(object), subject, insert_t, delete_t""" return query, (last_p, last_o, last_s, last_insert_t, last_delete_t) elif kind == '?po': query += f"""WHERE predicate = %s AND md5(object) = md5(%s) AND (subject, insert_t, delete_t) {symbol} (%s, %s, %s) ORDER BY predicate, md5(object), subject, insert_t, delete_t""" return query, (last_p, last_o, last_s, last_insert_t, last_delete_t) elif kind == 's?o': query += f"""WHERE subject = %s AND md5(object) = md5(%s) AND (predicate, insert_t, delete_t) {symbol} (%s, %s, %s) ORDER BY md5(object), subject, predicate, insert_t, delete_t""" return query, (last_s, last_o, last_p, last_insert_t, last_delete_t) elif kind == '??o': query += f"""WHERE md5(object) = md5(%s) AND (subject, predicate, insert_t, delete_t) {symbol} (%s, %s, %s, %s) ORDER BY md5(object), subject, predicate, insert_t, delete_t""" return query, (last_o, last_s, last_p, last_insert_t, last_delete_t) else: raise Exception(f"Unkown pattern type: {kind}")
def _estimate_cardinality(self, subject, predicate, obj) -> int: """ Estimate the cardinality of a triple pattern using SQlite statistics. Args: - subject ``string`` - Subject of the triple pattern - predicate ``string`` - Predicate of the triple pattern - obj ``string`` - Object of the triple pattern Returns: The estimated cardinality of the triple pattern """ # estimate triple cardinality using sqlite statistics (more or less a variable counting join ordering) kind = get_kind(subject, predicate, obj) if kind == 'spo': return self._spo_index_stats['same_spo_row_count'] elif kind == '???': return self._spo_index_stats['row_count'] elif kind == 's??': return self._spo_index_stats['same_s_row_count'] elif kind == 'sp?': return self._spo_index_stats['same_sp_row_count'] elif kind == '?p?': return self._pos_index_stats['same_p_row_count'] elif kind == '?po': return self._pos_index_stats['same_po_row_count'] elif kind == 's?o': return self._osp_index_stats['same_os_row_count'] elif kind == '??o': return self._osp_index_stats['same_o_row_count'] else: raise Exception(f"Unkown pattern type: {kind}")
def get_start_query(subj, pred, obj, table_name): """ Get a prepared SQL query which starts scanning for a triple pattern and the parameters used to execute it. """ kind = get_kind(subj, pred, obj) query = f"SELECT * FROM {table_name} " if kind == 'spo': query += "WHERE subject = ? AND predicate = ? AND object = ? ORDER BY subject, predicate, object" return query, (subj, pred, obj) elif kind == '???': query += "ORDER BY subject, predicate, object" # query += "ORDER BY predicate, object, subject" # query += "ORDER BY object, subject, predicate" return query, [] elif kind == 's??': query += "WHERE subject = ? ORDER BY subject, predicate, object" return query, [subj] elif kind == 'sp?': query += "WHERE subject = ? AND predicate = ? ORDER BY subject, predicate, object" return query, (subj, pred) elif kind == '?p?': query += "WHERE predicate = ? ORDER BY predicate, object, subject" return query, [pred] elif kind == '?po': query += "WHERE predicate = ? AND object = ? ORDER BY predicate, object, subject" return query, (pred, obj) elif kind == 's?o': query += "WHERE object = ? AND subject = ? AND ORDER BY object, subject, predicate" return query, (obj, subj) elif kind == '??o': query += "WHERE object = ? ORDER BY object, subject, predicate" return query, [obj] else: raise Exception(f"Unkown pattern type: {kind}")
def pattern_shape_estimate(subject: str, predicate: str, obj: str) -> int: """Get the ordering number of a triple pattern, according to heurisitcs from [1]. [1] Tsialiamanis et al., "Heuristics-based Query Optimisation for SPARQL", in EDBT 2012. Args: * subject: Subject of the triple pattern. * predicate: Predicate of the triple pattern. * obj: Object of the triple pattern. Returns: The ordering number of a triple pattern, as defined in [1]. """ kind = get_kind(subject, predicate, obj) if kind == 'spo': return 1 elif kind == 's?o': return 2 elif kind == '?po': return 3 elif kind == 'sp?': return 4 elif kind == '??o': return 5 elif kind == 's??': return 6 elif kind == '?p?': return 7 return 8
def get_resume_query(subj, pred, obj, last_read, table_name, symbol=">="): """ Get a prepared SQL query which resumes scanning for a triple pattern and the parameters used to execute it. """ last_s, last_p, last_o = last_read kind = get_kind(subj, pred, obj) query = f"""SELECT cs.value, cp.value, co.value FROM {table_name} INNER JOIN catalog AS cs ON subject = cs.id INNER JOIN catalog AS cp ON predicate = cp.id INNER JOIN catalog AS co ON object = co.id """ if kind == 'spo': return None, [] elif kind == '???': query += f"""WHERE (subject, predicate, object) {symbol} (?, ?, ?) ORDER BY subject, predicate, object""" # query += f"""WHERE (predicate, object, subject) {symbol} (?, ?, ?) # ORDER BY predicate, object, subject""" # query += f"""WHERE (object, subject, predicate) {symbol} (?, ?, ?) # ORDER BY object, subject, predicate""" return query, (last_s, last_p, last_o) # return query, (last_p, last_o, last_s) # return query, (last_o, last_s, last_p) elif kind == 's??': query += f"""WHERE subject = ? AND (predicate, object) {symbol} (?, ?) ORDER BY subject, predicate, object""" return query, (last_s, last_p, last_o) elif kind == 'sp?': query += f"""WHERE subject = ? AND predicate = ? AND (object) {symbol} ? ORDER BY subject, predicate, object""" return query, (last_s, last_p, last_o) elif kind == '?p?': query += f"""WHERE predicate = ? AND (object, subject) {symbol} (?, ?) ORDER BY predicate, object, subject""" return query, (last_p, last_o, last_s) elif kind == '?po': query += f"""WHERE predicate = ? AND object = ? AND (subject) {symbol} (?) ORDER BY predicate, object, subject""" return query, (last_p, last_o, last_s) elif kind == 's?o': query += f"""WHERE object = ? AND subject = ? AND (predicate) {symbol} (?) ORDER BY object, subject, predicate""" return query, (last_o, last_s, last_p) elif kind == '??o': query += f"""WHERE object = ? AND (subject, predicate) {symbol} (?, ?) ORDER BY object, subject, predicate""" return query, (last_o, last_s, last_p) else: raise Exception(f"Unkown pattern type: {kind}")
def get_resume_query(subj: str, pred: str, obj: str, last_read: Tuple[str, str, str], table_name: str, symbol: str = ">=") -> Tuple[str, str]: """Get a prepared SQL query which resumes scanning for a triple pattern. The SQL query rely on keyset pagination to resume query processing using an optimized Index Scan. Args: * subj: Subject of the triple pattern. * pred: Predicate of the triple pattern. * obj: Object of the triple pattern. * last_read: The SQL row from whoch to resume scanning. * table_name: Name of the SQL table to scan for RDF triples. * symbol: Symbol used to perform the keyset pagination. Defaults to ">=". Returns: A tuple with the prepared SQL query and its parameters. """ last_s, last_p, last_o = last_read kind = get_kind(subj, pred, obj) query = f"SELECT * FROM {table_name} " params = None if kind == 'spo': # i get the case with query S7 of LRB # look to loop -> #query += "WHERE subject = %s AND predicate = %s AND object = %s ORDER BY subject, predicate, object" #params = (last_s, last_p, last_o) return None, None elif kind == '???': # query += f"WHERE (subject, predicate, object) {symbol} (%s, %s, %s) ORDER BY subject, predicate, object" query += f"WHERE (predicate, object, subject) {symbol} (%s, %s, %s) ORDER BY predicate, object, subject" params = (last_p, last_o, last_s) elif kind == 's??': query += f"WHERE subject = %s AND (predicate, object) {symbol} (%s, %s) ORDER BY subject, predicate, object" params = (last_s, last_p, last_o) elif kind == 'sp?': query += f"WHERE subject = %s AND predicate = %s AND (object) {symbol} (%s) ORDER BY subject, predicate, object" params = (last_s, last_p, last_o) elif kind == '?p?': query += f"WHERE predicate = %s AND (object, subject) {symbol} (%s, %s) ORDER BY predicate, object, subject" params = (last_p, last_o, last_s) elif kind == '?po': query += f"WHERE predicate = %s AND object = %s AND (subject) {symbol} (%s) ORDER BY predicate, object, subject" params = (last_p, last_o, last_s) elif kind == 's?o': query += f"WHERE subject = %s AND object = %s AND (predicate) {symbol} (%s) ORDER BY object, subject, predicate" params = (last_s, last_o, last_p) elif kind == '??o': query += f"WHERE object = %s AND (subject, predicate) {symbol} (%s, %s) ORDER BY object, subject, predicate" params = (last_o, last_s, last_p) else: raise Exception(f"Unkown pattern type: {kind}") return query, params
def get_start_query(subj: str, pred: str, obj: str, table_name: str) -> Tuple[str, List[str]]: """Get a prepared SQL query which starts scanning for a triple pattern. Args: * subj: Subject of the triple pattern. * pred: Predicate of the triple pattern. * obj: Object of the triple pattern. * table_name: Name of the SQL table to scan for RDF triples. Returns: A tuple with the prepared SQL query and its parameters. """ kind = get_kind(subj, pred, obj) query = f"SELECT * FROM {table_name} " params = None if kind == 'spo': query += """WHERE subject = %s AND predicate = %s AND md5(object) = md5(%s) ORDER BY subject, predicate, md5(object)""" return query, (subj, pred, obj) elif kind == '???': query += "ORDER BY subject, predicate, md5(object)" # query += "ORDER BY predicate, md5(object), subject" # query += "ORDER BY md5(object), subject, predicate" return query, None elif kind == 's??': query += """WHERE subject = %s ORDER BY subject, predicate, md5(object)""" return query, [subj] elif kind == 'sp?': query += """WHERE subject = %s AND predicate = %s ORDER BY subject, predicate, md5(object)""" return query, (subj, pred) elif kind == '?p?': query += """WHERE predicate = %s ORDER BY predicate, md5(object), subject""" return query, [pred] elif kind == '?po': query += """WHERE predicate = %s AND md5(object) = md5(%s) ORDER BY predicate, md5(object), subject""" return query, (pred, obj) elif kind == 's?o': query += """WHERE md5(object) = md5(%s) AND subject = %s ORDER BY md5(object), subject, predicate""" return query, (obj, subj) elif kind == '??o': query += """WHERE md5(object) = md5(%s) ORDER BY md5(object), subject, predicate""" return query, [obj] else: raise Exception(f"Unkown pattern type: {kind}")
def get_resume_query(subj, pred, obj, last_read, table_name, fetch_size=500, symbol=">="): """ Get a prepared SQL query which resumes scanning for a triple pattern and the parameters used to execute it. """ last_s, last_p, last_o = last_read kind = get_kind(subj, pred, obj) query = "SELECT * FROM {} ".format(table_name) params = None if kind == 'spo': return None, None elif kind == '???': # query += "WHERE (subject, predicate, object) {} (%s, %s, %s) ORDER BY subject, predicate, object".format( # symbol) # params = (last_s, last_p, last_o) query += "WHERE (predicate, object, subject) {} (%s, %s, %s) ORDER BY predicate, object, subject".format( symbol) params = (last_p, last_o, last_s) elif kind == 's??': query += "WHERE subject = %s AND (predicate, object) {} (%s, %s) ORDER BY subject, predicate, object".format( symbol) params = (last_s, last_p, last_o) elif kind == 'sp?': query += "WHERE subject = %s AND predicate = %s AND (object) {} (%s) ORDER BY subject, predicate, object".format( symbol) params = (last_s, last_p, last_o) elif kind == '?p?': query += "WHERE predicate = %s AND (object, subject) {} (%s, %s) ORDER BY predicate, object, subject".format( symbol) params = (last_p, last_o, last_s) elif kind == '?po': query += "WHERE predicate = %s AND object = %s AND (subject) {} (%s) ORDER BY predicate, object, subject".format( symbol) params = (last_p, last_o, last_s) elif kind == 's?o': query += "WHERE subject = %s AND object = %s AND (predicate) {} (%s) ORDER BY object, subject, predicate".format( symbol) params = (last_s, last_o, last_p) elif kind == '??o': query += "WHERE object = %s AND (subject, predicate) {} (%s, %s) ORDER BY object, subject, predicate".format( symbol) params = (last_o, last_s, last_p) else: raise Exception("Unkown pattern type: {}".format(kind)) return query, params
def get_start_query(subj, pred, obj, table_name): """ Get a prepared SQL query which starts scanning for a triple pattern and the parameters used to execute it. """ kind = get_kind(subj, pred, obj) query = f"""SELECT cs.value, cp.value, co.value FROM {table_name} INNER JOIN catalog AS cs ON subject = cs.id INNER JOIN catalog AS cp ON predicate = cp.id INNER JOIN catalog AS co ON object = co.id """ if kind == 'spo': query += f"""WHERE subject = ? AND predicate = ? AND object = ? ORDER BY subject, predicate, object""" return query, (subj, pred, obj) elif kind == '???': query += "ORDER BY subject, predicate, object" # query += "ORDER BY predicate, object, subject" # query += "ORDER BY object, subject, predicate" return query, [] elif kind == 's??': query += f"""WHERE subject = ? ORDER BY subject, predicate, object""" return query, [subj] elif kind == 'sp?': query += f"""WHERE subject = ? AND predicate = ? ORDER BY subject, predicate, object""" return query, (subj, pred) elif kind == '?p?': query += f"""WHERE predicate = ? ORDER BY predicate, object, subject""" return query, [pred] elif kind == '?po': query += f"""WHERE predicate = ? AND object = ? ORDER BY predicate, object, subject""" return query, (pred, obj) elif kind == 's?o': query += f"""WHERE object = ? AND subject = ? ORDER BY object, subject, predicate""" return query, (obj, subj) elif kind == '??o': query += f"""WHERE object = ? ORDER BY object, subject, predicate""" return query, [obj] else: raise Exception(f"Unkown pattern type: {kind}")
def resume_triples(connection, last_read, s, p, o): """Resume the evaluation of a triple pattern from a RDF triple""" table = None kind = get_kind(s, p, o) if kind == '???': table = connection.table('spo') # table = connection.table('pos') # table = connection.table('osp') elif kind == 'spo' or kind == 's??' or kind == 'sp?': table = connection.table('spo') elif kind == '?p?' or kind == '?po': table = connection.table('pos') elif kind == 's?o' or kind == '??o': table = connection.table('osp') else: raise Exception(f"Unkown pattern type: {kind}") return table, last_read
def get_start_query(subj: str, pred: str, obj: str, table_name: str) -> Tuple[str, List[str]]: """Get a prepared SQL query which starts scanning for a triple pattern. Args: * subj: Subject of the triple pattern. * pred: Predicate of the triple pattern. * obj: Object of the triple pattern. * table_name: Name of the SQL table to scan for RDF triples. Returns: A tuple with the prepared SQL query and its parameters. """ kind = get_kind(subj, pred, obj) query = f"SELECT * FROM {table_name} " params = None if kind == 'spo': query += "WHERE subject = %s AND predicate = %s AND object = %s ORDER BY subject, predicate, object, insert_t, delete_t" params = (subj, pred, obj) elif kind == '???': query += ' ORDER BY subject, predicate, object, insert_t, delete_t' elif kind == 's??': query += "WHERE subject = %s ORDER BY subject, predicate, object, insert_t, delete_t" params = [subj] elif kind == 'sp?': query += "WHERE subject = %s AND predicate = %s ORDER BY subject, predicate, object, insert_t, delete_t" params = (subj, pred) elif kind == '?p?': query += "WHERE predicate = %s ORDER BY predicate, object, subject, insert_t, delete_t" params = [pred] elif kind == '?po': query += "WHERE predicate = %s AND object = %s ORDER BY predicate, object, subject, insert_t, delete_t" params = (pred, obj) elif kind == 's?o': query += "WHERE subject = %s AND object = %s ORDER BY object, subject, predicate, insert_t, delete_t" params = (subj, obj) elif kind == '??o': query += "WHERE object = %s ORDER BY object, subject, predicate, insert_t, delete_t" params = [obj] else: raise Exception(f"Unkown pattern type: {kind}") return query, params
def find_triples(connection, s, p, o): """Evaluate a triple pattern using the table SPO, POS or OSP""" table = None start_key = '' kind = get_kind(s, p, o) if kind == 'spo' or kind == 's??' or kind == 'sp?': table = connection.table('spo') start_key = build_row_key(s, p, o) elif kind == '???': table = connection.table('spo') # table = connection.table('pos') # table = connection.table('osp') elif kind == '?p?' or kind == '?po': table = connection.table('pos') start_key = build_row_key(p, o, s) elif kind == 's?o' or kind == '??o': table = connection.table('osp') start_key = build_row_key(o, s, p) else: raise Exception(f"Unkown pattern type: {kind}") return table, start_key
def pattern_shape_estimate(subject, predicate, obj): """ Get the ordering number of a triple pattern, according to heurisitcs from Tsialiamanis et al., 'Heuristics-based Query Optimisation for SPARQL', in EDBT 2012 """ kind = get_kind(subject, predicate, obj) if kind == 'spo': return 1 elif kind == 's?o': return 2 elif kind == '?po': return 3 elif kind == 'sp?': return 4 elif kind == '??o': return 5 elif kind == 's??': return 6 elif kind == '?p?': return 7 return 8
def _estimate_cardinality(self, subject, predicate, obj): """ Estimate the cardinality of a triple pattern using PostgreSQL histograms. Args: - subject ``string`` - Subject of the triple pattern - predicate ``string`` - Predicate of the triple pattern - obj ``string`` - Object of the triple pattern Returns: The estimated cardinality of the triple pattern """ # format triple patterns for the SQlite API s = int(subject.split('_')[1]) if (subject is not None) and ( not is_variable(subject)) else None p = int(predicate.split('_')[1]) if (predicate is not None) and ( not is_variable(predicate)) else None o = int(obj.split('_')[1]) if (obj is not None) and ( not is_variable(obj)) else None # estimate triple cardinality using sqlite statistics (more or less a variable counting join ordering) kind = get_kind(s, p, o) if kind == 'spo': return self._spo_index_stats['same_spo_row_count'] elif kind == '???': return self._spo_index_stats['row_count'] elif kind == 's??': return self._spo_index_stats['same_s_row_count'] elif kind == 'sp?': return self._spo_index_stats['same_sp_row_count'] elif kind == '?p?': return self._pos_index_stats['same_p_row_count'] elif kind == '?po': return self._pos_index_stats['same_po_row_count'] elif kind == 's?o': return self._osp_index_stats['same_os_row_count'] elif kind == '??o': return self._osp_index_stats['same_o_row_count'] else: raise Exception("Unkown pattern type: {}".format(kind))