Ejemplo n.º 1
0
def get_entities(offset=0, limit=100):
    sql = '''
SELECT * FROM entities
OFFSET %s LIMIT %s'''
    if limit > 10000:  # TODO avoid magic number
        raise ValueError('query limit {} too high'.format(limit))
    cursor = execute_sql(sql, (offset, limit), **EDB)
    rows = cursor.fetchall()
    return rows_to_dicts(rows, cursor)
Ejemplo n.º 2
0
def get_entity(id_):
    sql = '''
SELECT * FROM entities
WHERE id = %s'''
    cursor = execute_sql(sql, (id_, ), **EDB)
    if cursor.rowcount == 0:
        raise KeyError
    assert cursor.rowcount == 1  # id is primary key
    return row_to_dict(cursor.fetchone(), cursor)
Ejemplo n.º 3
0
def get_document_relations(document_id):
    """Return relation annotations for document."""
    sql = '''
SELECT * FROM relations
WHERE document_id = %s'''
    cursor = execute_sql(sql, (document_id, ), **MDB)
    rows = cursor.fetchall()
    # TODO: KeyError instead of empty list return when doc not in DB?
    relations = rows_to_dicts(rows, cursor)
    return [_db_relation_to_jsonld(r) for r in relations]
Ejemplo n.º 4
0
def get_document(id_):
    """Get document with given id."""
    sql = '''
SELECT * FROM documents
WHERE id = %s'''
    cursor = execute_sql(sql, (id_, ), **MDB)
    if cursor.rowcount == 0:
        raise KeyError
    assert cursor.rowcount == 1  # id is primary key
    return row_to_dict(cursor.fetchone(), cursor)
Ejemplo n.º 5
0
def get_mention(id_):
    """Get mention."""
    sql = '''
SELECT * FROM mentions
WHERE  id = %s'''
    cursor = execute_sql(sql, (id_, ), **MDB)
    if cursor.rowcount == 0:
        raise KeyError
    assert cursor.rowcount == 1  # id is primary key
    mention = row_to_dict(cursor.fetchone(), cursor)
    return _db_mention_to_jsonld(mention)
Ejemplo n.º 6
0
def get_entity_mentions(entity_id, offset, limit):
    """Return mention annotations grounded to entity."""
    sql = '''
SELECT * FROM mentions
WHERE  data @> %s
OFFSET %s
LIMIT  %s'''
    e_str = json.dumps({"body": {"id": entity_id}})
    cursor = execute_sql(sql, (e_str, offset, limit), **MDB)
    rows = cursor.fetchall()
    mentions = rows_to_dicts(rows, cursor)
    return [_db_mention_to_jsonld(m) for m in mentions]
Ejemplo n.º 7
0
def _get_relation_instances_denormalized(entity1_id, entity2_id, offset,
                                         limit):
    # Implementation using copies of mention data in relation
    if entity1_id > entity2_id:
        # Assuming symmetry, relations are stored so that the e1id < e2id
        entity1_id, entity2_id = entity2_id, entity1_id
    sql = '''
SELECT *
FROM   relations
WHERE  __from_data_body_id = %s
AND    __to_data_body_id = %s
OFFSET %s
LIMIT  %s'''
    cursor = execute_sql(sql, (entity1_id, entity2_id, offset, limit), **MDB)
    rows = cursor.fetchall()
    relations = rows_to_dicts(rows, cursor)
    return [_db_relation_to_jsonld(r) for r in relations]
Ejemplo n.º 8
0
def _get_relation_instances_join(entity1_id, entity2_id, offset, limit):
    # Implementation with double join (slow for large DBs)
    sql = '''
SELECT r.*
FROM   relations AS r,
       mentions AS m1,
       mentions AS m2
WHERE  m1.data @> %s
  AND  m2.data @> %s
  AND  r.from_id = m1.id
  AND  r.to_id = m2.id
OFFSET %s
LIMIT  %s'''
    e1_str = json.dumps({'body': {'id': entity1_id}})
    e2_str = json.dumps({'body': {'id': entity2_id}})
    cursor = execute_sql(sql, (e1_str, e2_str, offset, limit), **MDB)
    rows = cursor.fetchall()
    relations = rows_to_dicts(rows, cursor)
    return [_db_relation_to_jsonld(r) for r in relations]
Ejemplo n.º 9
0
def get_entity(id_):
    """Get entity with given id."""
    sql = '''
SELECT * FROM
(
	SELECT DISTINCT ON (LOWER(t2.name)) t2.id AS id, t2.label AS label, t2.name AS name, t2.score AS score FROM 
	(
		SELECT t.oid AS id, c.label AS label, t.text AS name, t.score AS score FROM node_text AS t, node_category AS c WHERE t.oid = c.oid AND t.oid = %s ORDER BY t.text
	)
	AS t2 
) 
AS t3 
ORDER BY score DESC
LIMIT 1'''
    cursor = execute_sql(sql, (id_, ), **MDB)
    rows = cursor.fetchall()
    if not rows:
        raise KeyError
    assert len(rows) == 1  # id is primary key
    return rows_to_dicts(rows, cursor)[0]