def get_entities(offset=0, limit=100): sql = ''' SELECT * FROM entities OFFSET %s LIMIT %s''' if limit > 10000: # TODO avoid magic number raise ValueError('query limit {} too high'.format(limit)) cursor = execute_sql(sql, (offset, limit), **EDB) rows = cursor.fetchall() return rows_to_dicts(rows, cursor)
def get_document_relations(document_id): """Return relation annotations for document.""" sql = ''' SELECT * FROM relations WHERE document_id = %s''' cursor = execute_sql(sql, (document_id, ), **MDB) rows = cursor.fetchall() # TODO: KeyError instead of empty list return when doc not in DB? relations = rows_to_dicts(rows, cursor) return [_db_relation_to_jsonld(r) for r in relations]
def get_entity_mentions(entity_id, offset, limit): """Return mention annotations grounded to entity.""" sql = ''' SELECT * FROM mentions WHERE data @> %s OFFSET %s LIMIT %s''' e_str = json.dumps({"body": {"id": entity_id}}) cursor = execute_sql(sql, (e_str, offset, limit), **MDB) rows = cursor.fetchall() mentions = rows_to_dicts(rows, cursor) return [_db_mention_to_jsonld(m) for m in mentions]
def _get_relation_instances_denormalized(entity1_id, entity2_id, offset, limit): # Implementation using copies of mention data in relation if entity1_id > entity2_id: # Assuming symmetry, relations are stored so that the e1id < e2id entity1_id, entity2_id = entity2_id, entity1_id sql = ''' SELECT * FROM relations WHERE __from_data_body_id = %s AND __to_data_body_id = %s OFFSET %s LIMIT %s''' cursor = execute_sql(sql, (entity1_id, entity2_id, offset, limit), **MDB) rows = cursor.fetchall() relations = rows_to_dicts(rows, cursor) return [_db_relation_to_jsonld(r) for r in relations]
def _get_relation_instances_join(entity1_id, entity2_id, offset, limit): # Implementation with double join (slow for large DBs) sql = ''' SELECT r.* FROM relations AS r, mentions AS m1, mentions AS m2 WHERE m1.data @> %s AND m2.data @> %s AND r.from_id = m1.id AND r.to_id = m2.id OFFSET %s LIMIT %s''' e1_str = json.dumps({'body': {'id': entity1_id}}) e2_str = json.dumps({'body': {'id': entity2_id}}) cursor = execute_sql(sql, (e1_str, e2_str, offset, limit), **MDB) rows = cursor.fetchall() relations = rows_to_dicts(rows, cursor) return [_db_relation_to_jsonld(r) for r in relations]
def get_entity(id_): """Get entity with given id.""" sql = ''' SELECT * FROM ( SELECT DISTINCT ON (LOWER(t2.name)) t2.id AS id, t2.label AS label, t2.name AS name, t2.score AS score FROM ( SELECT t.oid AS id, c.label AS label, t.text AS name, t.score AS score FROM node_text AS t, node_category AS c WHERE t.oid = c.oid AND t.oid = %s ORDER BY t.text ) AS t2 ) AS t3 ORDER BY score DESC LIMIT 1''' cursor = execute_sql(sql, (id_, ), **MDB) rows = cursor.fetchall() if not rows: raise KeyError assert len(rows) == 1 # id is primary key return rows_to_dicts(rows, cursor)[0]