예제 #1
0
def call_start_training():
    global should_continue
    while True:
        if not should_continue:
            break
        try:
            global model_training_queue
            task_id = model_training_queue.pop()
        except KeyError:
            time.sleep(10)
            continue

        cursor = get_connection().cursor()
        sql_to_prepare = 'CALL LTN_DEVELOP.LTN_TRAIN (?)'
        params = {
            'TASK_ID': task_id
        }
        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)

        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except Exception, e:
            print 'Error: ', e
        finally:
예제 #2
0
def create_document_in_database(document_id, document_text,
                                document_visibility, task):
    cursor = get_connection().cursor()
    cursor.execute("SELECT COUNT(*) FROM LTN_DEVELOP.DOCUMENTS WHERE ID = ?",
                   (document_id, ))
    result = cursor.fetchone()
    if result[0] != 0:
        return "A document with the ID '%s' already exists" % (
            document_id, ), 409

    sql_to_prepare = 'CALL LTN_DEVELOP.add_document (?, ?, ?)'
    params = {
        'DOCUMENT_ID': document_id,
        'DOCUMENT_TEXT': document_text.replace("'", "''"),
        'TASK': task
    }
    psid = cursor.prepare(sql_to_prepare)
    ps = cursor.get_prepared_statement(psid)
    cursor.execute_prepared(ps, [params])
    get_connection().commit()

    cursor.execute(
        "INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)",
        (create_new_user_doc_id(current_user.get_id(),
                                document_id), current_user.get_id(),
         document_id, document_visibility, datetime.now(), datetime.now()))
    get_connection().commit()
    return "Successfully imported", 201
예제 #3
0
def manage_task(task_id):
    cursor = get_connection().cursor()
    if request.method == 'GET':
        cursor.execute('SELECT t.id, t.name, t.domain, t.author, u.name '
                       'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author '
                       'WHERE t.id = ?', (task_id,))
        result = cursor.fetchone()
        cursor.execute('SELECT d.id, count(ud.id) '
                       'FROM LTN_DEVELOP.TASKS t '
                       'JOIN LTN_DEVELOP.DOCUMENTS d ON d.task = t.id '
                       'LEFT OUTER JOIN LTN_DEVELOP.USER_DOCUMENTS ud ON ud.document_id = d.id '
                       'AND (ud.visibility = 1 OR ud.user_id = ?) '
                       'WHERE t.id = ? '
                       'GROUP BY d.id ORDER BY d.id ASC', (current_user.get_id(), task_id))
        documents = list()
        for row in cursor.fetchall():
            documents.append({'document_id': row[0], 'user_document_count': row[1]})
        return respond_with({'task_id': result[0], 'task_name': result[1], 'task_domain': result[2],
                             'user_id': result[3], 'user_name': result[4], 'documents': documents})
    elif request.method == 'POST':
        req = request.get_json()
        if req.get('task_id') is not None:
            sql_to_prepare = 'CALL LTN_DEVELOP.update_task (?, ?, ?, ?, ?)'
        else:
            sql_to_prepare = 'CALL LTN_DEVELOP.add_task (?, ?, ?, ?, ?)'

        params = {
            'TASK_ID': req.get('task_id'),
            'TASK_NAME': req.get('task_name'),
            'TABLE_NAME': req.get('task_domain'),
            'ER_ANALYSIS_CONFIG': req.get('task_config'),
            'NEW_AUTHOR': req.get('user_id')
        }

        if params.get('TABLE_NAME', None) is None:
            generate_table_name(params)
        if params.get('NEW_AUTHOR', None) is None:
            params['NEW_AUTHOR'] = current_user.get_id()

        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)
        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except:
            pass  # Rows affected warning
        return 'OK', 200
    elif request.method == 'DELETE':
        sql_to_prepare = 'CALL LTN_DEVELOP.delete_task (?)'
        params = {'TASK_ID': task_id}
        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)
        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except:
            pass  # Rows affected warning
        return 'OK', 200
예제 #4
0
def save_userdoc_visibility(doc_id):
    user_doc_id = load_user_doc_id(doc_id, current_user.get_id())
    visibility = request.get_json()['visible']
    cursor = get_connection().cursor()
    cursor.execute(
        'UPDATE LTN_DEVELOP.USER_DOCUMENTS '
        'SET VISIBILITY = ? WHERE ID = ?', (visibility, user_doc_id))
    cursor.close()
    get_connection().commit()
    return "", 200
예제 #5
0
def create_user_doc_if_not_existent(user_doc_id,
                                    document_id,
                                    user_id,
                                    is_visible=True):
    cursor = get_connection().cursor()
    cursor.execute("SELECT 1 FROM LTN_DEVELOP.USER_DOCUMENTS WHERE ID = ?",
                   (user_doc_id, ))
    result = cursor.fetchone()
    if not result:
        date = datetime.now()
        cursor.execute(
            "INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)",
            (user_doc_id, user_id, document_id, int(is_visible), date, date))
        get_connection().commit()
예제 #6
0
def store_predicted_relations(pairs, user_document_id):
    cursor = get_connection().cursor()
    cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?", (user_document_id,))

    tuples = []
    pairs = filter(lambda x: x[0] != -1, pairs)
    for ddi, e1_id, e2_id in pairs:
        tuples.append((e1_id, e2_id, user_document_id, 1, ddi))

    cursor.executemany(
        "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID) VALUES (?, ?, ?, ?, ?)", tuples
    )
    get_connection().commit()
    cursor.close()
    return tuples
예제 #7
0
def remove_entities_without_relations(pairs, document_data, user_doc_id):
    used_entities = set()

    def add_entities_to_set(pair_tuple):
        used_entities.add(pair_tuple[0])
        used_entities.add(pair_tuple[1])

    map(add_entities_to_set, pairs)
    to_be_removed = map(lambda e: e['id'], filter(lambda d: d['id'] not in used_entities, document_data['denotations']))

    cursor = get_connection().cursor()
    id_string = "('" + "', '".join(to_be_removed) + "')"
    cursor.execute('DELETE FROM LTN_DEVELOP.ENTITIES WHERE ID IN ' + id_string + ' AND USER_DOC_ID = ?', (user_doc_id,))
    get_connection().commit()
    cursor.close()
예제 #8
0
def predict_entities(document_id, task_id, target_user_document_id):
    cursor = get_connection().cursor()

    cursor.execute('select "DOMAIN" from LTN_DEVELOP.tasks WHERE id = ?',
                   (task_id, ))
    table_name = cursor.fetchone()[0]
    index_name = "$TA_INDEX_" + table_name
    er_index_name = "$TA_ER_INDEX_" + table_name

    cursor.execute(
        """
        select distinct
          fti.ta_offset as "start",
          fti.ta_offset + length(fti.ta_token) as "end",
          fti.ta_token,
          tt.id
        from "LTN_DEVELOP"."%s" fti
        join "LTN_DEVELOP"."TYPES" t on (t.code = fti.ta_type or
          (t.code = 'T092' and fti.ta_type like 'ORGANIZATION%%'))
        join "LTN_DEVELOP"."TASK_TYPES" tt on t.id = tt.type_id and tt.task_id = ?
        join "LTN_DEVELOP"."%s" pos on fti.document_id = pos.document_id and fti.ta_offset = pos.ta_offset
        where fti.document_id = ?
          and length(fti.ta_token) >= 3
          and pos.ta_type in ('noun', 'abbreviation', 'proper name')
        order by fti.ta_offset
    """ % (er_index_name, index_name), (
            task_id,
            document_id,
        ))

    entities = list()
    offsets = list()

    for row in cursor.fetchall():
        entity_id = "%s_%s_%s_%s" % (target_user_document_id, row[0], row[2],
                                     row[3])
        entity_id = entity_id.replace(' ', '_').replace('/', '_')
        entities.append(
            (entity_id, target_user_document_id, int(row[3]), None, row[2]))
        offsets.append((row[0], row[1], entity_id, target_user_document_id))

    cursor.executemany(
        'insert into "LTN_DEVELOP"."ENTITIES" VALUES (?, ?, ?, ?, ?)',
        entities)
    cursor.executemany(
        'insert into "LTN_DEVELOP"."OFFSETS" VALUES (?, ?, ?, ?)', offsets)
    get_connection().commit()
    cursor.close()
예제 #9
0
def store_predicted_relations(pairs, user_document_id):
    cursor = get_connection().cursor()
    cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?",
                   (user_document_id, ))

    tuples = []
    pairs = filter(lambda x: x[0] != -1, pairs)
    for ddi, e1_id, e2_id in pairs:
        tuples.append((e1_id, e2_id, user_document_id, 1, ddi))

    cursor.executemany(
        "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID) VALUES (?, ?, ?, ?, ?)",
        tuples)
    get_connection().commit()
    cursor.close()
    return tuples
예제 #10
0
def get_user_documents(user_id):
    if user_id != current_user.get_id():
        return "Not authorized to view the documents of this user.", 401
    cursor = get_connection().cursor()
    cursor.execute(
        "SELECT ID, USER_ID, DOCUMENT_ID, VISIBILITY, CREATED_AT, UPDATED_AT "
        "FROM LTN_DEVELOP.USER_DOCUMENTS "
        "WHERE USER_ID = ? OR VISIBILITY > 0 ORDER BY DOCUMENT_ID",
        (user_id, ))
    user_documents = list()
    for result in cursor.fetchall():
        user_documents.append({
            "id":
            result[0],
            "user_id":
            result[1],
            "document_id":
            result[2],
            "visibility":
            result[3],
            "created_at":
            result[4].strftime('%Y-%m-%d %H:%M:%S'),
            "updated_at":
            result[5].strftime('%Y-%m-%d %H:%M:%S')
        })
    cursor.close()
    return respond_with(user_documents)
예제 #11
0
def save_relations(user_doc_id, relations, id_map):
    relation_tuples = list()
    for relation in relations:
        if id_map.get(relation['subj']) is not None and id_map.get(
                relation['obj']) is not None:
            relation_tuples.append(
                (id_map[relation['subj']], id_map[relation['obj']],
                 user_doc_id, 1, relation['pred'].get('id'),
                 relation['pred'].get('label', None)))

    cursor = get_connection().cursor()
    cursor.executemany(
        "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID, LABEL) "
        "VALUES (?, ?, ?, ?, ?, ?)", relation_tuples)
    get_connection().commit()
    return True
예제 #12
0
def get_associated_users(document_id):
    cursor = get_connection().cursor()
    cursor.execute("SELECT DISTINCT ud.user_id "
                   "FROM LTN_DEVELOP.USER_DOCUMENTS ud "
                   "JOIN LTN_DEVELOP.DOCUMENTS d ON ud.document_id = d.id")
    users = map(lambda row: row[0], cursor.fetchall())
    cursor.close()
    return users
예제 #13
0
def predict():
    data = request.get_json()
    task_id = data['task_id']
    jobs = data.get('jobs', [PREDICT_ENTITIES])
    document_id = data['document_id']
    user_id = data.get('user_id', current_user.get_id())
    current_prediction_user = prediction_user_for_user(user_id)
    prediction_user_doc_id = load_user_doc_id(document_id,
                                              current_prediction_user)
    delete_user_document(prediction_user_doc_id)

    document_data = json.loads(data.get('current_state', None))
    if document_data is None:
        document_data = load_document(document_id, user_id)
    else:
        # the current status has to be saved first in order to disambiguate the ids of the annotations
        user_doc_id = load_user_doc_id(document_id, current_user.get_id())
        successful = save_document(document_data, user_doc_id, document_id,
                                   current_user.get_id(), task_id)
        if not successful:
            return "Could not save the document", 500

    if PREDICT_ENTITIES in jobs:
        cursor = get_connection().cursor()
        cursor.execute(
            'INSERT INTO "LTN_DEVELOP"."USER_DOCUMENTS" '
            'VALUES (?, ?, ?, 0, current_timestamp, current_timestamp)', (
                prediction_user_doc_id,
                current_prediction_user,
                document_id,
            ))
        cursor.close()
        get_connection().commit()
        predict_entities(document_id, task_id, prediction_user_doc_id)
    if PREDICT_RELATIONS in jobs:
        if PREDICT_ENTITIES not in jobs:
            save_document(document_data, prediction_user_doc_id, document_id,
                          current_prediction_user, task_id, False)
        predicted_pairs = predict_relations(prediction_user_doc_id, task_id)
        if PREDICT_ENTITIES not in jobs:
            remove_entities_without_relations(predicted_pairs, document_data,
                                              prediction_user_doc_id)

    document_data = load_document(document_id, current_user.get_id(), True)
    return respond_with(document_data)
예제 #14
0
def manage_task_type(type_id):
    cursor = get_connection().cursor()
    if request.method == 'GET':
        cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID '
                       'FROM LTN_DEVELOP.TYPES t '
                       'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID '
                       'WHERE tt.ID = ?', (type_id,))
        row = cursor.fetchone()
        if row:
            return respond_with({"code": row[0], "name": row[1], "groupId": row[2], "group": row[3],
                                 "label": row[4], "type_id": row[5], "id": row[6]})
        return 'NOT FOUND', 404
    elif request.method == 'PUT':
        req = request.get_json()
        updated_type = req.get('type')
        cursor.execute('SELECT ID FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id,))
        already_existing = cursor.fetchone()
        if already_existing:
            cursor.execute('UPDATE LTN_DEVELOP.TASK_TYPES SET ID = ?, LABEL = ?, TYPE_ID = ? '
                           'WHERE ID = ?',
                           (updated_type.get('id'), updated_type.get('label'), updated_type.get('type_id'), type_id))
            get_connection().commit()
            return 'UPDATED', 200
        else:
            task_id = req.get('task')
            is_relation = req.get('relation')
            cursor.execute('INSERT INTO LTN_DEVELOP.TASK_TYPES (LABEL, TASK_ID, TYPE_ID, RELATION) '
                           'VALUES (?, ?, ?, ?)',
                           (updated_type.get('label'), task_id, updated_type.get('type_id'), is_relation))
            get_connection().commit()
            return 'CREATED', 200
    elif request.method == 'DELETE':
        cursor.execute('DELETE FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id,))
        get_connection().commit()
        return 'DELETED', 200
예제 #15
0
def load_user_doc_id(document_id, user_id):
    cursor = get_connection().cursor()
    cursor.execute(
        "SELECT ID FROM LTN_DEVELOP.USER_DOCUMENTS WHERE DOCUMENT_ID = ? AND USER_ID = ?",
        (document_id, user_id))
    result = cursor.fetchone()
    if result:
        return str(result[0])
    return create_new_user_doc_id(user_id, document_id)
예제 #16
0
def delete_user_documents(user_document_ids):
    user_document_ids = "('" + "', '".join(user_document_ids) + "')"
    try:
        cursor = get_connection().cursor()
        cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID IN " +
                       user_document_ids)
        cursor.execute(
            "DELETE FROM LTN_DEVELOP.OFFSETS WHERE USER_DOC_ID IN  " +
            user_document_ids)
        cursor.execute(
            "DELETE FROM LTN_DEVELOP.ENTITIES WHERE USER_DOC_ID IN " +
            user_document_ids)
        cursor.execute("DELETE FROM LTN_DEVELOP.USER_DOCUMENTS WHERE ID IN " +
                       user_document_ids)
        get_connection().commit()
        return True
    except Exception, e:
        raise e
예제 #17
0
def get_tasks():
    cursor = get_connection().cursor()
    cursor.execute('SELECT t.id, t.name, t.domain, t.config, t.author, u.name '
                   'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ORDER BY t.id')
    tasks = list()
    for result in cursor.fetchall():
        tasks.append({'task_id': result[0], 'task_name': result[1], 'task_domain': result[2], 'task_config': result[3],
                      'user_id': result[4], 'user_name': result[5]})
    return respond_with(tasks)
예제 #18
0
def delete_document(document_id):
    try:
        cursor = get_connection().cursor()
        cursor.execute(
            "SELECT ID FROM LTN_DEVELOP.USER_DOCUMENTS WHERE DOCUMENT_ID = ?",
            (document_id, ))
        user_document_ids = map(lambda t: t[0], cursor.fetchall())
        delete_user_documents(user_document_ids)

        sql_to_prepare = 'CALL LTN_DEVELOP.delete_document (?)'
        params = {'DOCUMENT_ID': document_id}
        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)
        cursor.execute_prepared(ps, [params])
        get_connection().commit()

        return True
    except Exception, e:
        raise e
예제 #19
0
def get_document(document_id):
    if get_connection() is None:
        try_reconnecting()
    if request.method == 'GET':
        try:
            result = load_document(document_id, current_user.get_id())
            return respond_with(result)
        except Exception, e:
            print e
            reset_connection()
            return 'Error while loading the document.', 500
예제 #20
0
def remove_entities_without_relations(pairs, document_data, user_doc_id):
    used_entities = set()

    def add_entities_to_set(pair_tuple):
        used_entities.add(pair_tuple[0])
        used_entities.add(pair_tuple[1])

    map(add_entities_to_set, pairs)
    to_be_removed = map(
        lambda e: e['id'],
        filter(lambda d: d['id'] not in used_entities,
               document_data['denotations']))

    cursor = get_connection().cursor()
    id_string = "('" + "', '".join(to_be_removed) + "')"
    cursor.execute(
        'DELETE FROM LTN_DEVELOP.ENTITIES WHERE ID IN ' + id_string +
        ' AND USER_DOC_ID = ?', (user_doc_id, ))
    get_connection().commit()
    cursor.close()
예제 #21
0
def predict_relations(user_document_id, task_id):
    cursor = get_connection().cursor()

    sql_to_prepare = 'CALL LTN_DEVELOP.PREDICT_UD (?, ?, ?)'
    params = {'UD_ID': user_document_id, 'TASK_ID': str(task_id)}
    psid = cursor.prepare(sql_to_prepare)
    ps = cursor.get_prepared_statement(psid)
    cursor.execute_prepared(ps, [params])
    pairs = cursor.fetchall()

    return store_predicted_relations(pairs, user_document_id)
예제 #22
0
def return_entities():
    req = request.get_json()
    document_id = req['document_id']
    user1 = req['user1']
    user2 = req['user2']

    cursor = get_connection().cursor()
    predictions = sorted(get_entities_for_user_document(cursor, document_id, user1), key=lambda x: x.start)
    gold_standard = sorted(get_entities_for_user_document(cursor, document_id, user2), key=lambda x: x.start)

    p = 0
    matches, left_aligns, right_aligns, overlaps, misses, wrong_type = 0, 0, 0, 0, 0, {}

    for entity in gold_standard:
        if len(predictions) == 0:
            misses += 1
            continue
        while predictions[p].end < entity.start:
            if p == len(predictions) - 1:
                break
            p += 1
        can_miss = True
        for candidate in predictions[p:]:
            if candidate.start > entity.end:
                if can_miss:
                    misses += 1
                    can_miss = False
                break
            if candidate.end < entity.start:
                break
            can_miss = False
            if candidate.start != entity.start:
                if candidate.end == entity.end:
                    if candidate.type != entity.type:
                        wrong_type["right-aligns"] = wrong_type.get("right-aligns", 0) + 1
                    right_aligns += 1
                else:
                    if candidate.type != entity.type:
                        wrong_type["overlaps"] = wrong_type.get("overlaps", 0) + 1
                    overlaps += 1
            else:
                if candidate.end == entity.end:
                    if candidate.type != entity.type:
                        wrong_type["matches"] = wrong_type.get("matches", 0) + 1
                    matches += 1
                else:
                    if candidate.type != entity.type:
                        wrong_type["left-aligns"] = wrong_type.get("left-aligns", 0) + 1
                    left_aligns += 1
        if can_miss:
            misses += 1

    return respond_with({"matches": matches, "left-aligns": left_aligns, "right-aligns": right_aligns,
                         "overlaps": overlaps, "misses": misses, "wrong-type": wrong_type})
예제 #23
0
def predict_relations(user_document_id, task_id):
    cursor = get_connection().cursor()

    sql_to_prepare = 'CALL LTN_DEVELOP.PREDICT_UD (?, ?, ?)'
    params = {'UD_ID': user_document_id,
              'TASK_ID': str(task_id)}
    psid = cursor.prepare(sql_to_prepare)
    ps = cursor.get_prepared_statement(psid)
    cursor.execute_prepared(ps, [params])
    pairs = cursor.fetchall()

    return store_predicted_relations(pairs, user_document_id)
예제 #24
0
def get_base_types():
    cursor = get_connection().cursor()
    cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", ID FROM LTN_DEVELOP.TYPES ORDER BY "GROUP" DESC')
    types = list()

    for aType in cursor.fetchall():
        types.append({"code": aType[0],
                      "name": aType[1],
                      "groupId": aType[2],
                      "group": aType[3],
                      "id": aType[4]})
    return types
예제 #25
0
def save_annotations(user_doc_id, annotations):
    # only save annotations from the current user, defined as userId 0 at loading time
    filtered_annotations = filter(
        lambda annotation: annotation.get('userId', 0) == 0, annotations)
    if not user_doc_id:
        return False
    print "inserting new annotations..."
    annotation_tuples = map(
        lambda annotation: convert_annotation(annotation, user_doc_id),
        filtered_annotations)
    cursor = get_connection().cursor()
    cursor.executemany(
        "INSERT INTO LTN_DEVELOP.ENTITIES (ID, USER_DOC_ID, TYPE_ID, LABEL) "
        "VALUES (?, ?, ?, ?)", annotation_tuples)
    print "inserting new offsets..."
    offset_tuples = map(
        lambda annotation: convert_offset(annotation, user_doc_id),
        filtered_annotations)
    cursor.executemany("INSERT INTO LTN_DEVELOP.OFFSETS VALUES (?, ?, ?, ?)",
                       offset_tuples)
    get_connection().commit()
    return True
예제 #26
0
def get_task_types(task_id, relation):
    cursor = get_connection().cursor()
    relation_flag = int(relation)
    cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID '
                   'FROM LTN_DEVELOP.TYPES t '
                   'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID '
                   'WHERE tt.TASK_ID = ? AND tt.RELATION = ? '
                   'ORDER BY "GROUP" DESC', (task_id, relation_flag))
    types = list()
    for row in cursor.fetchall():
        types.append({"code": row[0], "name": row[1], "groupId": row[2], "group": row[3],
                      "label": row[4], "type_id": row[5], "id": row[6]})
    return types
예제 #27
0
def predict_entities(document_id, task_id, target_user_document_id):
    cursor = get_connection().cursor()

    cursor.execute('select "DOMAIN" from LTN_DEVELOP.tasks WHERE id = ?', (task_id,))
    table_name = cursor.fetchone()[0]
    index_name = "$TA_INDEX_" + table_name
    er_index_name = "$TA_ER_INDEX_" + table_name

    cursor.execute("""
        select distinct
          fti.ta_offset as "start",
          fti.ta_offset + length(fti.ta_token) as "end",
          fti.ta_token,
          tt.id
        from "LTN_DEVELOP"."%s" fti
        join "LTN_DEVELOP"."TYPES" t on (t.code = fti.ta_type or
          (t.code = 'T092' and fti.ta_type like 'ORGANIZATION%%'))
        join "LTN_DEVELOP"."TASK_TYPES" tt on t.id = tt.type_id and tt.task_id = ?
        join "LTN_DEVELOP"."%s" pos on fti.document_id = pos.document_id and fti.ta_offset = pos.ta_offset
        where fti.document_id = ?
          and length(fti.ta_token) >= 3
          and pos.ta_type in ('noun', 'abbreviation', 'proper name')
        order by fti.ta_offset
    """ % (er_index_name, index_name), (task_id, document_id,))

    entities = list()
    offsets = list()

    for row in cursor.fetchall():
        entity_id = "%s_%s_%s_%s" % (target_user_document_id, row[0], row[2], row[3])
        entity_id = entity_id.replace(' ', '_').replace('/', '_')
        entities.append((entity_id, target_user_document_id, int(row[3]), None, row[2]))
        offsets.append((row[0], row[1], entity_id, target_user_document_id))

    cursor.executemany('insert into "LTN_DEVELOP"."ENTITIES" VALUES (?, ?, ?, ?, ?)', entities)
    cursor.executemany('insert into "LTN_DEVELOP"."OFFSETS" VALUES (?, ?, ?, ?)', offsets)
    get_connection().commit()
    cursor.close()
예제 #28
0
def login():
    if get_connection() is None:
        try_reconnecting()
    req = request.get_json()
    if req and 'username' in req and 'password' in req:
        try:
            user = load_user(req['username'])
            if user and req['password'] == user.token:
                login_user(user, remember=True)
                user.token = None
                return respond_with(user.__dict__)
        except Exception, e:
            reset_connection()
            return str(e) + " Please try again later.", 500
예제 #29
0
def get_types(document_id, relation):
    cursor = get_connection().cursor()
    relation_flag = int(relation)
    cursor.execute('''SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID
                      FROM LTN_DEVELOP.TYPES t
                      JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID
                      JOIN LTN_DEVELOP.DOCUMENTS d ON tt.TASK_ID = d.TASK
                      WHERE d.id = ? AND tt.RELATION = ?
                      ORDER BY "GROUP" DESC''', (document_id, relation_flag))
    types = list()
    for row in cursor.fetchall():
        types.append({"code": row[0], "name": "%s (%s)" % (row[4], row[1]), "groupId": row[2], "group": row[3],
                      "label": row[4], "type_id": row[5], "id": row[6]})
    return types
예제 #30
0
def create_document_in_database(document_id, document_text, document_visibility, task):
    cursor = get_connection().cursor()
    cursor.execute("SELECT COUNT(*) FROM LTN_DEVELOP.DOCUMENTS WHERE ID = ?", (document_id,))
    result = cursor.fetchone()
    if result[0] != 0:
        return "A document with the ID '%s' already exists" % (document_id,), 409

    sql_to_prepare = 'CALL LTN_DEVELOP.add_document (?, ?, ?)'
    params = {
        'DOCUMENT_ID': document_id,
        'DOCUMENT_TEXT': document_text.replace("'", "''"),
        'TASK': task
    }
    psid = cursor.prepare(sql_to_prepare)
    ps = cursor.get_prepared_statement(psid)
    cursor.execute_prepared(ps, [params])
    get_connection().commit()

    cursor.execute("INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)",
                   (create_new_user_doc_id(current_user.get_id(), document_id), current_user.get_id(), document_id,
                    document_visibility, datetime.now(), datetime.now()))
    get_connection().commit()
    return "Successfully imported", 201
예제 #31
0
def predict():
    data = request.get_json()
    task_id = data['task_id']
    jobs = data.get('jobs', [PREDICT_ENTITIES])
    document_id = data['document_id']
    user_id = data.get('user_id', current_user.get_id())
    current_prediction_user = prediction_user_for_user(user_id)
    prediction_user_doc_id = load_user_doc_id(document_id, current_prediction_user)
    delete_user_document(prediction_user_doc_id)

    document_data = json.loads(data.get('current_state', None))
    if document_data is None:
        document_data = load_document(document_id, user_id)
    else:
        # the current status has to be saved first in order to disambiguate the ids of the annotations
        user_doc_id = load_user_doc_id(document_id, current_user.get_id())
        successful = save_document(document_data, user_doc_id, document_id, current_user.get_id(), task_id)
        if not successful:
            return "Could not save the document", 500

    if PREDICT_ENTITIES in jobs:
        cursor = get_connection().cursor()
        cursor.execute('INSERT INTO "LTN_DEVELOP"."USER_DOCUMENTS" '
                       'VALUES (?, ?, ?, 0, current_timestamp, current_timestamp)',
                       (prediction_user_doc_id, current_prediction_user, document_id,))
        cursor.close()
        get_connection().commit()
        predict_entities(document_id, task_id, prediction_user_doc_id)
    if PREDICT_RELATIONS in jobs:
        if PREDICT_ENTITIES not in jobs:
            save_document(document_data, prediction_user_doc_id, document_id, current_prediction_user, task_id, False)
        predicted_pairs = predict_relations(prediction_user_doc_id, task_id)
        if PREDICT_ENTITIES not in jobs:
            remove_entities_without_relations(predicted_pairs, document_data, prediction_user_doc_id)

    document_data = load_document(document_id, current_user.get_id(), True)
    return respond_with(document_data)
예제 #32
0
def call_start_training():
    global should_continue
    while True:
        if not should_continue:
            break
        try:
            global model_training_queue
            task_id = model_training_queue.pop()
        except KeyError:
            time.sleep(10)
            continue

        cursor = get_connection().cursor()
        sql_to_prepare = 'CALL LTN_DEVELOP.LTN_TRAIN (?)'
        params = {'TASK_ID': task_id}
        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)

        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except Exception, e:
            print 'Error: ', e
        finally:
예제 #33
0
def get_base_types():
    cursor = get_connection().cursor()
    cursor.execute(
        'SELECT CODE, NAME, GROUP_ID, "GROUP", ID FROM LTN_DEVELOP.TYPES ORDER BY "GROUP" DESC'
    )
    types = list()

    for aType in cursor.fetchall():
        types.append({
            "code": aType[0],
            "name": aType[1],
            "groupId": aType[2],
            "group": aType[3],
            "id": aType[4]
        })
    return types
예제 #34
0
def manage_task_type(type_id):
    cursor = get_connection().cursor()
    if request.method == 'GET':
        cursor.execute(
            'SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID '
            'FROM LTN_DEVELOP.TYPES t '
            'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID '
            'WHERE tt.ID = ?', (type_id, ))
        row = cursor.fetchone()
        if row:
            return respond_with({
                "code": row[0],
                "name": row[1],
                "groupId": row[2],
                "group": row[3],
                "label": row[4],
                "type_id": row[5],
                "id": row[6]
            })
        return 'NOT FOUND', 404
    elif request.method == 'PUT':
        req = request.get_json()
        updated_type = req.get('type')
        cursor.execute('SELECT ID FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?',
                       (type_id, ))
        already_existing = cursor.fetchone()
        if already_existing:
            cursor.execute(
                'UPDATE LTN_DEVELOP.TASK_TYPES SET ID = ?, LABEL = ?, TYPE_ID = ? '
                'WHERE ID = ?',
                (updated_type.get('id'), updated_type.get('label'),
                 updated_type.get('type_id'), type_id))
            get_connection().commit()
            return 'UPDATED', 200
        else:
            task_id = req.get('task')
            is_relation = req.get('relation')
            cursor.execute(
                'INSERT INTO LTN_DEVELOP.TASK_TYPES (LABEL, TASK_ID, TYPE_ID, RELATION) '
                'VALUES (?, ?, ?, ?)',
                (updated_type.get('label'), task_id,
                 updated_type.get('type_id'), is_relation))
            get_connection().commit()
            return 'CREATED', 200
    elif request.method == 'DELETE':
        cursor.execute('DELETE FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?',
                       (type_id, ))
        get_connection().commit()
        return 'DELETED', 200
예제 #35
0
def get_tasks():
    cursor = get_connection().cursor()
    cursor.execute(
        'SELECT t.id, t.name, t.domain, t.config, t.author, u.name '
        'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ORDER BY t.id'
    )
    tasks = list()
    for result in cursor.fetchall():
        tasks.append({
            'task_id': result[0],
            'task_name': result[1],
            'task_domain': result[2],
            'task_config': result[3],
            'user_id': result[4],
            'user_name': result[5]
        })
    return respond_with(tasks)
예제 #36
0
def delete_annotation_data(user_doc_id):
    cursor = get_connection().cursor()
    print "Deleting old information for " + str(user_doc_id) + "..."
    print "Deleting existing pairs..."
    cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?",
                   (user_doc_id, ))
    get_connection().commit()
    print "Deleting existing offsets..."
    cursor.execute("DELETE FROM LTN_DEVELOP.OFFSETS WHERE USER_DOC_ID = ?",
                   (user_doc_id, ))
    get_connection().commit()
    print "Deleting existing annotations..."
    cursor.execute("DELETE FROM LTN_DEVELOP.ENTITIES WHERE USER_DOC_ID = ?",
                   (user_doc_id, ))
    get_connection().commit()
예제 #37
0
def load_document(document_id, user_id, show_predictions=False):
    cursor = get_connection().cursor()
    result = {}
    print "Loading information for document_id: '%s' and user: '******'" % (
        document_id, user_id)
    result['text'] = get_text(cursor, document_id)
    denotations, users, annotation_id_map = get_denotations_and_users(
        cursor, document_id, user_id, show_predictions)
    result['denotations'] = denotations
    result['relations'] = get_relations(cursor, document_id, user_id,
                                        annotation_id_map, show_predictions)
    result['sourceid'] = document_id
    result['config'] = {
        'entity types': get_entity_types(document_id),
        'relation types': get_relation_types(document_id),
        'users': users
    }
    cursor.close()
    return result
예제 #38
0
def get_task_types(task_id, relation):
    cursor = get_connection().cursor()
    relation_flag = int(relation)
    cursor.execute(
        'SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID '
        'FROM LTN_DEVELOP.TYPES t '
        'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID '
        'WHERE tt.TASK_ID = ? AND tt.RELATION = ? '
        'ORDER BY "GROUP" DESC', (task_id, relation_flag))
    types = list()
    for row in cursor.fetchall():
        types.append({
            "code": row[0],
            "name": row[1],
            "groupId": row[2],
            "group": row[3],
            "label": row[4],
            "type_id": row[5],
            "id": row[6]
        })
    return types
예제 #39
0
def get_types(document_id, relation):
    cursor = get_connection().cursor()
    relation_flag = int(relation)
    cursor.execute(
        '''SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID
                      FROM LTN_DEVELOP.TYPES t
                      JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID
                      JOIN LTN_DEVELOP.DOCUMENTS d ON tt.TASK_ID = d.TASK
                      WHERE d.id = ? AND tt.RELATION = ?
                      ORDER BY "GROUP" DESC''', (document_id, relation_flag))
    types = list()
    for row in cursor.fetchall():
        types.append({
            "code": row[0],
            "name": "%s (%s)" % (row[4], row[1]),
            "groupId": row[2],
            "group": row[3],
            "label": row[4],
            "type_id": row[5],
            "id": row[6]
        })
    return types
예제 #40
0
def get_document_details(document_id):
    user_documents = list()
    cursor = get_connection().cursor()
    user_id = current_user.get_id()
    cursor.execute(
        'SELECT d.id, MIN(d.user_id), MIN(u.name), COUNT(DISTINCT e.id), COUNT(distinct p.id), MIN(d.visibility) '
        'FROM LTN_DEVELOP.USER_DOCUMENTS d '
        'JOIN LTN_DEVELOP.USERS u ON u.id = d.user_id '
        'LEFT OUTER JOIN LTN_DEVELOP.ENTITIES e ON e.user_doc_id = d.id '
        'LEFT OUTER JOIN LTN_DEVELOP.PAIRS p ON p.user_doc_id = d.id AND p.ddi = 1 '
        'WHERE d.document_id = ? AND (d.visibility = 1 OR d.user_id = ?) '
        'GROUP BY d.id', (document_id, user_id))
    for row in cursor.fetchall():
        user_documents.append({
            'id': row[0],
            'user_id': row[1],
            'user_name': row[2],
            'entities': row[3],
            'pairs': row[4],
            'visible': bool(row[5]),
            'from_current_user': row[1] == user_id
        })
    return respond_with(user_documents)
예제 #41
0
def return_entities():
    req = request.get_json()
    document_id = req['document_id']
    user1 = req['user1']
    user2 = req['user2']

    cursor = get_connection().cursor()
    predictions = sorted(get_entities_for_user_document(
        cursor, document_id, user1),
                         key=lambda x: x.start)
    gold_standard = sorted(get_entities_for_user_document(
        cursor, document_id, user2),
                           key=lambda x: x.start)

    p = 0
    matches, left_aligns, right_aligns, overlaps, misses, wrong_type = 0, 0, 0, 0, 0, {}

    for entity in gold_standard:
        if len(predictions) == 0:
            misses += 1
            continue
        while predictions[p].end < entity.start:
            if p == len(predictions) - 1:
                break
            p += 1
        can_miss = True
        for candidate in predictions[p:]:
            if candidate.start > entity.end:
                if can_miss:
                    misses += 1
                    can_miss = False
                break
            if candidate.end < entity.start:
                break
            can_miss = False
            if candidate.start != entity.start:
                if candidate.end == entity.end:
                    if candidate.type != entity.type:
                        wrong_type["right-aligns"] = wrong_type.get(
                            "right-aligns", 0) + 1
                    right_aligns += 1
                else:
                    if candidate.type != entity.type:
                        wrong_type["overlaps"] = wrong_type.get("overlaps",
                                                                0) + 1
                    overlaps += 1
            else:
                if candidate.end == entity.end:
                    if candidate.type != entity.type:
                        wrong_type["matches"] = wrong_type.get("matches",
                                                               0) + 1
                    matches += 1
                else:
                    if candidate.type != entity.type:
                        wrong_type["left-aligns"] = wrong_type.get(
                            "left-aligns", 0) + 1
                    left_aligns += 1
        if can_miss:
            misses += 1

    return respond_with({
        "matches": matches,
        "left-aligns": left_aligns,
        "right-aligns": right_aligns,
        "overlaps": overlaps,
        "misses": misses,
        "wrong-type": wrong_type
    })
예제 #42
0
def manage_task(task_id):
    cursor = get_connection().cursor()
    if request.method == 'GET':
        cursor.execute(
            'SELECT t.id, t.name, t.domain, t.author, u.name '
            'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author '
            'WHERE t.id = ?', (task_id, ))
        result = cursor.fetchone()
        cursor.execute(
            'SELECT d.id, count(ud.id) '
            'FROM LTN_DEVELOP.TASKS t '
            'JOIN LTN_DEVELOP.DOCUMENTS d ON d.task = t.id '
            'LEFT OUTER JOIN LTN_DEVELOP.USER_DOCUMENTS ud ON ud.document_id = d.id '
            'AND (ud.visibility = 1 OR ud.user_id = ?) '
            'WHERE t.id = ? '
            'GROUP BY d.id ORDER BY d.id ASC',
            (current_user.get_id(), task_id))
        documents = list()
        for row in cursor.fetchall():
            documents.append({
                'document_id': row[0],
                'user_document_count': row[1]
            })
        return respond_with({
            'task_id': result[0],
            'task_name': result[1],
            'task_domain': result[2],
            'user_id': result[3],
            'user_name': result[4],
            'documents': documents
        })
    elif request.method == 'POST':
        req = request.get_json()
        if req.get('task_id') is not None:
            sql_to_prepare = 'CALL LTN_DEVELOP.update_task (?, ?, ?, ?, ?)'
        else:
            sql_to_prepare = 'CALL LTN_DEVELOP.add_task (?, ?, ?, ?, ?)'

        params = {
            'TASK_ID': req.get('task_id'),
            'TASK_NAME': req.get('task_name'),
            'TABLE_NAME': req.get('task_domain'),
            'ER_ANALYSIS_CONFIG': req.get('task_config'),
            'NEW_AUTHOR': req.get('user_id')
        }

        if params.get('TABLE_NAME', None) is None:
            generate_table_name(params)
        if params.get('NEW_AUTHOR', None) is None:
            params['NEW_AUTHOR'] = current_user.get_id()

        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)
        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except:
            pass  # Rows affected warning
        return 'OK', 200
    elif request.method == 'DELETE':
        sql_to_prepare = 'CALL LTN_DEVELOP.delete_task (?)'
        params = {'TASK_ID': task_id}
        psid = cursor.prepare(sql_to_prepare)
        ps = cursor.get_prepared_statement(psid)
        try:
            cursor.execute_prepared(ps, [params])
            get_connection().commit()
        except:
            pass  # Rows affected warning
        return 'OK', 200
예제 #43
0
def get_users():
    cursor = get_connection().cursor()
    users = User.all(cursor)
    cursor.close()
    return respond_with(map(lambda user: user.__dict__, users))
예제 #44
0
def load_user(user_id):
    return User.get(user_id, get_connection().cursor())