def get_direct_predicate_value(subj, pred, additional_lookup={}): if not subj: return [] if not type(subj) == list: subj = [subj] or_clauses = ' OR '.join(['subject = %s' for _ in subj]) query = """ SELECT subject, predicate, object, value FROM toflerdb_eternity WHERE predicate = %s AND """ query += '(' + or_clauses + ')' query_data = tuple([pred] + subj) # print query response = Common.execute_query(query, query_data) if len(response) == 0: query = """ SELECT subject, predicate, object, value FROM toflerdb_ontology WHERE predicate = %s AND """ query += '(' + or_clauses + ')' response = Common.execute_query(query, query_data) new_values = [] for res in response: val = res['object'] if val is None: val = res['value'] new_values.append(val) for s in subj: if s in additional_lookup and pred in additional_lookup[s]: new_values += [ x for x in additional_lookup[s][pred] if x not in new_values ] return new_values
def reboot(self): self.create_es_mappings() self.create_sql_tables() self.log("Inserting Base Ontology ...") prefilled_ontology = [ ('to:type', 'to:subClassOf', 'to:Property'), ('to:type', 'to:domain', 'to:Entity'), ('to:type', 'to:domain', 'to:ComplexProperty'), ('to:type', 'to:domain', 'to:ComplexRelationalProperty'), ('to:type', 'to:range', 'to:Token'), ('to:templatizedId', 'to:subClassOf', 'to:Property'), ('to:templatizedId', 'to:domain', 'to:Entity'), ('to:templatizedId', 'to:domain', 'to:ComplexProperty'), ('to:templatizedId', 'to:domain', 'to:ComplexRelationalProperty'), ('to:templatizedId', 'to:range', 'to:ID'), ('to:templatizedId', 'to:isUnique', 'True'), ('to:label', 'to:subClassOf', 'to:Property'), ('to:label', 'to:domain', 'to:Entity'), ('to:label', 'to:range', 'to:String') ] query = """ INSERT INTO toflerdb_ontology(subject, predicate, value) VALUES(%s, %s, %s) """ for row in prefilled_ontology: Common.execute_query(query, row) api.insert_ontology( file=self.ONTOLOGY_PATH, validation=False)
def get_snapshot_nodes(**kwargs): for key, val in kwargs.iteritems(): break if not isinstance(val, list): val = [val] es = Common.get_elasticsearch_connection() q_string_arr = [] for elem in val: q_string_arr.append( {"query_string": { "fields": [key, "*.%s" % key], "query": elem }}) try: query = {"query": {"bool": {"should": q_string_arr}}} response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=query) entities = {} if response['hits']['total'] > 0: for hit in response['hits']['hits']: entities[hit['_id']] = hit['_source'] return entities return None except Exception as e: logger = Common.get_logger() logger.error(str(e)) return None
def delete_from_eternity(fact_ids): query = """ DELETE FROM toflerdb_eternity WHERE fact_id IN """ placeholder = ', '.join(['%s' for _ in fact_ids]) query = '%s (%s)' % (query, placeholder) query_data = tuple(fact_ids) Common.execute_query(query, query_data)
def delete_snapshot_nodes_by_id(id_list): if not isinstance(id_list, list): id_list = [id_list] es = Common.get_elasticsearch().get_connection() for i in id_list: try: es.delete(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, id=i) except ElasticsearchException, e: Common.get_logger().error('Error deleting node<%s>\n%s' % (i, e))
def execute_query(query, **kwargs): es = Common.get_elasticsearch().get_connection() try: response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=query, **kwargs) return response except ElasticsearchException, e: Common.get_logger().error('Error ES query execution\n%s' % e)
def erase_facts_from_eternity(fact_ids, author=None): if not isinstance(fact_ids, list): fact_ids = [fact_ids] query = """ UPDATE toflerdb_eternity SET status = %s, status_updated_on = %s, status_updated_by = %s WHERE fact_id IN """ placeholder_str = ', '.join(['%s' for _ in fact_ids]) query = '%s (%s)' % (query, placeholder_str) query_data = tuple([FACT_STATUS.DELETED, datetime.datetime.now(), author] + fact_ids) Common.execute_query(query, query_data)
def exists_in_eternity(subj, additional_lookup={}, eternity_only=False, ontology_only=False): response = [] if not ontology_only: query = """ SELECT subject FROM toflerdb_eternity WHERE subject = %s """ response = Common.execute_query(query, subj) if len(response) == 0 and not eternity_only: query = """ SELECT subject FROM toflerdb_ontology WHERE subject = %s """ response = Common.execute_query(query, subj) return len(response) > 0 or subj in additional_lookup
def get_related_nodes(nodeid): query = {"query": {"match": {"id": nodeid}}} es = Common.get_elasticsearch_connection() response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=query) # response = response[1] if response['hits']['total'] < 1: return None node = response['hits']['hits'][0] references = _gather_references(node['_source']) if len(references) == 0: return None match_segments = [] for ref in references: match_segments.append({"match": {"id": ref[1]}}) query = {"query": {"bool": {"should": match_segments}}} response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=query) # response = response[1] if response['hits']['total'] < 1: return None return {'relationships': references, 'nodes': response['hits']['hits']}
def get_complete_snapshot_mapping(): es = Common.get_elasticsearch_connection() complete_mapping = es.indices.get_mapping(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE) return complete_mapping[SNAPSHOT_INDEX]['mappings'][SNAPSHOT_DOC_TYPE][ 'properties']
def get_fact_ids(fact_tuples, author=None): query = """ SELECT fact_id FROM toflerdb_eternity WHERE """ placeholder = [] query_data = [] for row in fact_tuples: row = list(row) subj = row[0] if templatizedid.is_templatized_id(subj): subj = templatizedid.append_userid(subj, author) subj = get_id_by_templatized_id(subj) if not subj: continue placeholder.append('(subject=%s AND predicate=%s AND \ (object=%s OR value=%s))') (subj, row[1], row[2]) = convert_elements_to_string(subj, row[1], row[2]) query_data += [subj, row[1], row[2], row[2]] if not len(placeholder): return [] placeholder_str = ' OR '.join(placeholder) query += placeholder_str response = Common.execute_query(query, tuple(query_data)) retval = [] for res in response: retval.append(res['fact_id']) return retval
def insert_into_ontology(inputs): if not inputs: return query = """ INSERT INTO toflerdb_ontology(subject, predicate, object, value) VALUES """ query_data = [] query_clauses = [] for item in inputs: query_clauses.append('%s, %s, %s, %s') query_data += [ item['subject'], item['predicate'], item['object'], item['value'] ] query_clauses_str = '), ('.join(query_clauses) query += '(' + query_clauses_str + ')' Common.execute_query(query, tuple(query_data))
def lock_nodes(nodes): LOCK_INTERVAL = 5 * 60 if not isinstance(nodes, list): nodes = [nodes] r = Common.get_cache_connection() for node in nodes: r.set(node, node, ex=LOCK_INTERVAL, nx=True) return nodes
def get_type(element): query = """ SELECT * FROM toflerdb_ontology WHERE subject = %s AND predicate = %s """ query_data = (element, 'rdfs:type') response = Common.execute_query(query, query_data) if len(response): return response[0]['predicate'] return None
def exists_in_ontology(subj, pred, objc): query = """ SELECT subject FROM toflerdb_ontology WHERE subject = %s AND predicate = %s AND (object = %s OR value = %s) """ response = Common.execute_query(query, (subj, pred, objc, objc)) if len(response): return True return False
def find_nodes_with_incoming_references(nodeid): query = {"query": {"match": {"_all": nodeid}}} es = Common.get_elasticsearch_connection() response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=query) # response = response[1] if response['hits']['total'] < 1: return [] hits = response['hits']['hits'] return hits
def get_fact_status(fact_ids): query = """ SELECT fact_id, status FROM toflerdb_eternity WHERE fact_id IN """ placeholder_str = ', '.join(['%s' for _ in fact_ids]) query = '%s (%s)' % (query, placeholder_str) query_data = tuple(fact_ids) response = Common.execute_query(query, query_data) retval = {res['fact_id']: res['status'] for res in response} return retval
def get_id_by_templatized_id(temp_id): query = """ SELECT subject FROM toflerdb_eternity WHERE predicate = %s AND value = %s """ query_data = ('to:templatizedId', temp_id) response = Common.execute_query(query, query_data) retval = None if len(response): retval = response[0]['subject'] return retval
def is_any_type_facts(fact_ids): if not isinstance(fact_ids, list): fact_ids = [fact_ids] placeholder_str = ', '.join(['%s' for _ in fact_ids]) query = """ SELECT fact_id FROM toflerdb_eternity WHERE fact_id IN (%s) AND predicate = 'to:type' """ % placeholder_str query_data = tuple(fact_ids) response = Common.execute_query(query, query_data) return len(response) > 0
def get_all_namespaces(): query = """ SELECT DISTINCT(subject) AS subject FROM toflerdb_ontology """ response = Common.execute_query(query) all_namespaces = [] for res in response: namespace = res['subject'].split(':')[0] if namespace not in all_namespaces: all_namespaces.append(namespace) return all_namespaces
def create_es_mappings(self): self.log("Creating Index Mapping ...") es = Common.get_elasticsearch_connection() es.indices.delete( index=config.SNAPSHOT_INDEX, ignore=[400, 404]) mapping = open(self.INDEX_MAPPING_FILE).read() es.indices.create( index=config.SNAPSHOT_INDEX, ignore=400, body=mapping) mapping = open(self.DOCTYPE_MAPPING_FILE).read() es.indices.put_mapping( index=config.SNAPSHOT_INDEX, doc_type=config.SNAPSHOT_DOC_TYPE, ignore=400, body=mapping)
def find_subfact_ids(fact_ids): if not isinstance(fact_ids, list): fact_ids = [fact_ids] query = """ SELECT predicate FROM toflerdb_eternity WHERE fact_id IN """ placeholder_str = ', '.join(['%s' for _ in fact_ids]) query = '%s (%s)' % (query, placeholder_str) query_data = tuple(fact_ids) response = Common.execute_query(query, query_data) pred = [res['predicate'] for res in response] query = """ SELECT fact_id FROM toflerdb_eternity WHERE subject IN """ placeholder_str = ', '.join(['%s' for _ in pred]) query = '%s (%s)' % (query, placeholder_str) query_data = tuple(pred) response = Common.execute_query(query, query_data) retval = [res['fact_id'] for res in response] return retval
def tuple_exists_in_eternity(subj, pred, objc): (subj, pred, objc) = convert_elements_to_string(subj, pred, objc) # print type(subj), type(pred), type(objc) # print subj, pred, objc query = """ SELECT subject FROM toflerdb_eternity WHERE subject = %s AND predicate = %s AND (object = %s OR value = %s) AND status != %s """ response = Common.execute_query( query, (subj, pred, objc, objc, FACT_STATUS.DELETED)) if len(response): return True return False
def get_fact_tuples_by_fact_ids(fact_ids): if not isinstance(fact_ids, list): fact_ids = [fact_ids] placeholder_str = ', '.join(['%s' for _ in fact_ids]) query = """ SELECT subject, predicate, object, value FROM toflerdb_eternity WHERE fact_id IN (%s) """ % placeholder_str query_data = tuple(fact_ids) response = Common.execute_query(query, query_data) return [(res['subject'], res['predicate'], res['object'] or res['value']) for res in response]
def find_related_facts_by_node_id(node_ids): if not isinstance(node_ids, list): node_ids = [node_ids] placeholder_str = ', '.join(['%s' for _ in node_ids]) query = """ SELECT fact_id FROM toflerdb_eternity WHERE subject IN (%s) OR object IN (%s) """ % (placeholder_str, placeholder_str) query_data = tuple(node_ids + node_ids) response = Common.execute_query(query, query_data) return [res['fact_id'] for res in response]
def freetextsearch(query): elastic_query = {"query": {"match": {"_all": query}}} es = Common.get_elasticsearch_connection() response = es.search(index=SNAPSHOT_INDEX, doc_type=SNAPSHOT_DOC_TYPE, body=elastic_query) output = [] hits = response['hits']['hits'] for hit in hits: src = hit['_source'] opsrc = {} _create_freetextsearch_output(src, src, opsrc) output.append(opsrc) return output
def add_input(self, subj, pred, objc, prev=None, author=None): self.normalize_input(subj, pred, objc) self.inverse_normalize_input(objc, pred, subj) if not dbutils.exists_in_ontology(subj, pred, objc): if self._validation: self.validate(subj, pred, objc) self._input_list.append({ 'subject': subj, 'predicate': pred, 'object': objc, 'fact_id': Common.generate_id(None) }) return self
def get_direct_inverse_predicate_value(objc, pred, additional_lookup={}): if not objc: return [] if not type(objc) == list: objc = [objc] or_clauses = ' OR '.join(['object = %s OR value = %s' for _ in objc]) query = """ SELECT subject, predicate, object, value FROM toflerdb_eternity WHERE predicate = %s AND """ query += '(' + or_clauses + ')' objc_data = [] for x in objc: objc_data += [x, x] query_data = tuple([pred] + objc_data) # print query response = Common.execute_query(query, query_data) if len(response) == 0: query = """ SELECT subject, predicate, object, value FROM toflerdb_ontology WHERE predicate = %s AND """ query += '(' + or_clauses + ')' response = Common.execute_query(query, query_data) new_values = [] for res in response: val = res['subject'] new_values.append(val) for s in objc: if s in additional_lookup and pred in additional_lookup[s]: new_values += [ x for x in additional_lookup[s][pred] if x not in new_values ] return new_values
def add_templatized_id_map(self, temp_id): ''' create the templatized_id to tofler_id map, if not exists returns the tofler_id if the temp_id is not a throw away id, look for it in eternity if found, cache the mapping, else create a new id ''' if temp_id not in self._templatized_id_map: stored_id = [] if templatizedid.is_keep_id(temp_id): stored_id = dbutils.get_inverse_predicate_value( temp_id, 'to:templatizedId', level=1) if len(stored_id): tofler_id = stored_id[0] self._templatized_id_map[temp_id] = tofler_id else: tofler_id = Common.generate_id(None) self._templatized_id_map[temp_id] = tofler_id self._new_templatized_id.append(temp_id) return self._templatized_id_map[temp_id]
def add_input(self, subj, pred, objc, prev=None): self._fact_tuple = (subj, pred, objc) subj = self.handle_templatized_id(subj) pred = self.handle_templatized_id(pred) objc = self.handle_templatized_id( objc, additional_check=self.is_templatized_object, additional_check_args=(subj, pred, objc)) if self._ignore_duplicate and \ eternity_dbutils.tuple_exists_in_eternity(subj, pred, objc): return self self.normalize_input(subj, pred, objc) if self._validation and not self.is_valid(subj, pred, objc): self.delete_normalize_input(subj, pred, objc) return self self._fact_id = Common.generate_id(None) self.create_eternity_input(subj, pred, objc, prev) self.create_snapshot_input(subj, pred, objc) return self