def call_start_training(): global should_continue while True: if not should_continue: break try: global model_training_queue task_id = model_training_queue.pop() except KeyError: time.sleep(10) continue cursor = get_connection().cursor() sql_to_prepare = 'CALL LTN_DEVELOP.LTN_TRAIN (?)' params = { 'TASK_ID': task_id } psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except Exception, e: print 'Error: ', e finally:
def create_document_in_database(document_id, document_text, document_visibility, task): cursor = get_connection().cursor() cursor.execute("SELECT COUNT(*) FROM LTN_DEVELOP.DOCUMENTS WHERE ID = ?", (document_id, )) result = cursor.fetchone() if result[0] != 0: return "A document with the ID '%s' already exists" % ( document_id, ), 409 sql_to_prepare = 'CALL LTN_DEVELOP.add_document (?, ?, ?)' params = { 'DOCUMENT_ID': document_id, 'DOCUMENT_TEXT': document_text.replace("'", "''"), 'TASK': task } psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) cursor.execute_prepared(ps, [params]) get_connection().commit() cursor.execute( "INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)", (create_new_user_doc_id(current_user.get_id(), document_id), current_user.get_id(), document_id, document_visibility, datetime.now(), datetime.now())) get_connection().commit() return "Successfully imported", 201
def manage_task(task_id): cursor = get_connection().cursor() if request.method == 'GET': cursor.execute('SELECT t.id, t.name, t.domain, t.author, u.name ' 'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ' 'WHERE t.id = ?', (task_id,)) result = cursor.fetchone() cursor.execute('SELECT d.id, count(ud.id) ' 'FROM LTN_DEVELOP.TASKS t ' 'JOIN LTN_DEVELOP.DOCUMENTS d ON d.task = t.id ' 'LEFT OUTER JOIN LTN_DEVELOP.USER_DOCUMENTS ud ON ud.document_id = d.id ' 'AND (ud.visibility = 1 OR ud.user_id = ?) ' 'WHERE t.id = ? ' 'GROUP BY d.id ORDER BY d.id ASC', (current_user.get_id(), task_id)) documents = list() for row in cursor.fetchall(): documents.append({'document_id': row[0], 'user_document_count': row[1]}) return respond_with({'task_id': result[0], 'task_name': result[1], 'task_domain': result[2], 'user_id': result[3], 'user_name': result[4], 'documents': documents}) elif request.method == 'POST': req = request.get_json() if req.get('task_id') is not None: sql_to_prepare = 'CALL LTN_DEVELOP.update_task (?, ?, ?, ?, ?)' else: sql_to_prepare = 'CALL LTN_DEVELOP.add_task (?, ?, ?, ?, ?)' params = { 'TASK_ID': req.get('task_id'), 'TASK_NAME': req.get('task_name'), 'TABLE_NAME': req.get('task_domain'), 'ER_ANALYSIS_CONFIG': req.get('task_config'), 'NEW_AUTHOR': req.get('user_id') } if params.get('TABLE_NAME', None) is None: generate_table_name(params) if params.get('NEW_AUTHOR', None) is None: params['NEW_AUTHOR'] = current_user.get_id() psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except: pass # Rows affected warning return 'OK', 200 elif request.method == 'DELETE': sql_to_prepare = 'CALL LTN_DEVELOP.delete_task (?)' params = {'TASK_ID': task_id} psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except: pass # Rows affected warning return 'OK', 200
def save_userdoc_visibility(doc_id): user_doc_id = load_user_doc_id(doc_id, current_user.get_id()) visibility = request.get_json()['visible'] cursor = get_connection().cursor() cursor.execute( 'UPDATE LTN_DEVELOP.USER_DOCUMENTS ' 'SET VISIBILITY = ? WHERE ID = ?', (visibility, user_doc_id)) cursor.close() get_connection().commit() return "", 200
def create_user_doc_if_not_existent(user_doc_id, document_id, user_id, is_visible=True): cursor = get_connection().cursor() cursor.execute("SELECT 1 FROM LTN_DEVELOP.USER_DOCUMENTS WHERE ID = ?", (user_doc_id, )) result = cursor.fetchone() if not result: date = datetime.now() cursor.execute( "INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)", (user_doc_id, user_id, document_id, int(is_visible), date, date)) get_connection().commit()
def store_predicted_relations(pairs, user_document_id): cursor = get_connection().cursor() cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?", (user_document_id,)) tuples = [] pairs = filter(lambda x: x[0] != -1, pairs) for ddi, e1_id, e2_id in pairs: tuples.append((e1_id, e2_id, user_document_id, 1, ddi)) cursor.executemany( "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID) VALUES (?, ?, ?, ?, ?)", tuples ) get_connection().commit() cursor.close() return tuples
def remove_entities_without_relations(pairs, document_data, user_doc_id): used_entities = set() def add_entities_to_set(pair_tuple): used_entities.add(pair_tuple[0]) used_entities.add(pair_tuple[1]) map(add_entities_to_set, pairs) to_be_removed = map(lambda e: e['id'], filter(lambda d: d['id'] not in used_entities, document_data['denotations'])) cursor = get_connection().cursor() id_string = "('" + "', '".join(to_be_removed) + "')" cursor.execute('DELETE FROM LTN_DEVELOP.ENTITIES WHERE ID IN ' + id_string + ' AND USER_DOC_ID = ?', (user_doc_id,)) get_connection().commit() cursor.close()
def predict_entities(document_id, task_id, target_user_document_id): cursor = get_connection().cursor() cursor.execute('select "DOMAIN" from LTN_DEVELOP.tasks WHERE id = ?', (task_id, )) table_name = cursor.fetchone()[0] index_name = "$TA_INDEX_" + table_name er_index_name = "$TA_ER_INDEX_" + table_name cursor.execute( """ select distinct fti.ta_offset as "start", fti.ta_offset + length(fti.ta_token) as "end", fti.ta_token, tt.id from "LTN_DEVELOP"."%s" fti join "LTN_DEVELOP"."TYPES" t on (t.code = fti.ta_type or (t.code = 'T092' and fti.ta_type like 'ORGANIZATION%%')) join "LTN_DEVELOP"."TASK_TYPES" tt on t.id = tt.type_id and tt.task_id = ? join "LTN_DEVELOP"."%s" pos on fti.document_id = pos.document_id and fti.ta_offset = pos.ta_offset where fti.document_id = ? and length(fti.ta_token) >= 3 and pos.ta_type in ('noun', 'abbreviation', 'proper name') order by fti.ta_offset """ % (er_index_name, index_name), ( task_id, document_id, )) entities = list() offsets = list() for row in cursor.fetchall(): entity_id = "%s_%s_%s_%s" % (target_user_document_id, row[0], row[2], row[3]) entity_id = entity_id.replace(' ', '_').replace('/', '_') entities.append( (entity_id, target_user_document_id, int(row[3]), None, row[2])) offsets.append((row[0], row[1], entity_id, target_user_document_id)) cursor.executemany( 'insert into "LTN_DEVELOP"."ENTITIES" VALUES (?, ?, ?, ?, ?)', entities) cursor.executemany( 'insert into "LTN_DEVELOP"."OFFSETS" VALUES (?, ?, ?, ?)', offsets) get_connection().commit() cursor.close()
def store_predicted_relations(pairs, user_document_id): cursor = get_connection().cursor() cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?", (user_document_id, )) tuples = [] pairs = filter(lambda x: x[0] != -1, pairs) for ddi, e1_id, e2_id in pairs: tuples.append((e1_id, e2_id, user_document_id, 1, ddi)) cursor.executemany( "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID) VALUES (?, ?, ?, ?, ?)", tuples) get_connection().commit() cursor.close() return tuples
def get_user_documents(user_id): if user_id != current_user.get_id(): return "Not authorized to view the documents of this user.", 401 cursor = get_connection().cursor() cursor.execute( "SELECT ID, USER_ID, DOCUMENT_ID, VISIBILITY, CREATED_AT, UPDATED_AT " "FROM LTN_DEVELOP.USER_DOCUMENTS " "WHERE USER_ID = ? OR VISIBILITY > 0 ORDER BY DOCUMENT_ID", (user_id, )) user_documents = list() for result in cursor.fetchall(): user_documents.append({ "id": result[0], "user_id": result[1], "document_id": result[2], "visibility": result[3], "created_at": result[4].strftime('%Y-%m-%d %H:%M:%S'), "updated_at": result[5].strftime('%Y-%m-%d %H:%M:%S') }) cursor.close() return respond_with(user_documents)
def save_relations(user_doc_id, relations, id_map): relation_tuples = list() for relation in relations: if id_map.get(relation['subj']) is not None and id_map.get( relation['obj']) is not None: relation_tuples.append( (id_map[relation['subj']], id_map[relation['obj']], user_doc_id, 1, relation['pred'].get('id'), relation['pred'].get('label', None))) cursor = get_connection().cursor() cursor.executemany( "INSERT INTO LTN_DEVELOP.PAIRS (E1_ID, E2_ID, USER_DOC_ID, DDI, TYPE_ID, LABEL) " "VALUES (?, ?, ?, ?, ?, ?)", relation_tuples) get_connection().commit() return True
def get_associated_users(document_id): cursor = get_connection().cursor() cursor.execute("SELECT DISTINCT ud.user_id " "FROM LTN_DEVELOP.USER_DOCUMENTS ud " "JOIN LTN_DEVELOP.DOCUMENTS d ON ud.document_id = d.id") users = map(lambda row: row[0], cursor.fetchall()) cursor.close() return users
def predict(): data = request.get_json() task_id = data['task_id'] jobs = data.get('jobs', [PREDICT_ENTITIES]) document_id = data['document_id'] user_id = data.get('user_id', current_user.get_id()) current_prediction_user = prediction_user_for_user(user_id) prediction_user_doc_id = load_user_doc_id(document_id, current_prediction_user) delete_user_document(prediction_user_doc_id) document_data = json.loads(data.get('current_state', None)) if document_data is None: document_data = load_document(document_id, user_id) else: # the current status has to be saved first in order to disambiguate the ids of the annotations user_doc_id = load_user_doc_id(document_id, current_user.get_id()) successful = save_document(document_data, user_doc_id, document_id, current_user.get_id(), task_id) if not successful: return "Could not save the document", 500 if PREDICT_ENTITIES in jobs: cursor = get_connection().cursor() cursor.execute( 'INSERT INTO "LTN_DEVELOP"."USER_DOCUMENTS" ' 'VALUES (?, ?, ?, 0, current_timestamp, current_timestamp)', ( prediction_user_doc_id, current_prediction_user, document_id, )) cursor.close() get_connection().commit() predict_entities(document_id, task_id, prediction_user_doc_id) if PREDICT_RELATIONS in jobs: if PREDICT_ENTITIES not in jobs: save_document(document_data, prediction_user_doc_id, document_id, current_prediction_user, task_id, False) predicted_pairs = predict_relations(prediction_user_doc_id, task_id) if PREDICT_ENTITIES not in jobs: remove_entities_without_relations(predicted_pairs, document_data, prediction_user_doc_id) document_data = load_document(document_id, current_user.get_id(), True) return respond_with(document_data)
def manage_task_type(type_id): cursor = get_connection().cursor() if request.method == 'GET': cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID ' 'FROM LTN_DEVELOP.TYPES t ' 'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID ' 'WHERE tt.ID = ?', (type_id,)) row = cursor.fetchone() if row: return respond_with({"code": row[0], "name": row[1], "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6]}) return 'NOT FOUND', 404 elif request.method == 'PUT': req = request.get_json() updated_type = req.get('type') cursor.execute('SELECT ID FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id,)) already_existing = cursor.fetchone() if already_existing: cursor.execute('UPDATE LTN_DEVELOP.TASK_TYPES SET ID = ?, LABEL = ?, TYPE_ID = ? ' 'WHERE ID = ?', (updated_type.get('id'), updated_type.get('label'), updated_type.get('type_id'), type_id)) get_connection().commit() return 'UPDATED', 200 else: task_id = req.get('task') is_relation = req.get('relation') cursor.execute('INSERT INTO LTN_DEVELOP.TASK_TYPES (LABEL, TASK_ID, TYPE_ID, RELATION) ' 'VALUES (?, ?, ?, ?)', (updated_type.get('label'), task_id, updated_type.get('type_id'), is_relation)) get_connection().commit() return 'CREATED', 200 elif request.method == 'DELETE': cursor.execute('DELETE FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id,)) get_connection().commit() return 'DELETED', 200
def load_user_doc_id(document_id, user_id): cursor = get_connection().cursor() cursor.execute( "SELECT ID FROM LTN_DEVELOP.USER_DOCUMENTS WHERE DOCUMENT_ID = ? AND USER_ID = ?", (document_id, user_id)) result = cursor.fetchone() if result: return str(result[0]) return create_new_user_doc_id(user_id, document_id)
def delete_user_documents(user_document_ids): user_document_ids = "('" + "', '".join(user_document_ids) + "')" try: cursor = get_connection().cursor() cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID IN " + user_document_ids) cursor.execute( "DELETE FROM LTN_DEVELOP.OFFSETS WHERE USER_DOC_ID IN " + user_document_ids) cursor.execute( "DELETE FROM LTN_DEVELOP.ENTITIES WHERE USER_DOC_ID IN " + user_document_ids) cursor.execute("DELETE FROM LTN_DEVELOP.USER_DOCUMENTS WHERE ID IN " + user_document_ids) get_connection().commit() return True except Exception, e: raise e
def get_tasks(): cursor = get_connection().cursor() cursor.execute('SELECT t.id, t.name, t.domain, t.config, t.author, u.name ' 'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ORDER BY t.id') tasks = list() for result in cursor.fetchall(): tasks.append({'task_id': result[0], 'task_name': result[1], 'task_domain': result[2], 'task_config': result[3], 'user_id': result[4], 'user_name': result[5]}) return respond_with(tasks)
def delete_document(document_id): try: cursor = get_connection().cursor() cursor.execute( "SELECT ID FROM LTN_DEVELOP.USER_DOCUMENTS WHERE DOCUMENT_ID = ?", (document_id, )) user_document_ids = map(lambda t: t[0], cursor.fetchall()) delete_user_documents(user_document_ids) sql_to_prepare = 'CALL LTN_DEVELOP.delete_document (?)' params = {'DOCUMENT_ID': document_id} psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) cursor.execute_prepared(ps, [params]) get_connection().commit() return True except Exception, e: raise e
def get_document(document_id): if get_connection() is None: try_reconnecting() if request.method == 'GET': try: result = load_document(document_id, current_user.get_id()) return respond_with(result) except Exception, e: print e reset_connection() return 'Error while loading the document.', 500
def remove_entities_without_relations(pairs, document_data, user_doc_id): used_entities = set() def add_entities_to_set(pair_tuple): used_entities.add(pair_tuple[0]) used_entities.add(pair_tuple[1]) map(add_entities_to_set, pairs) to_be_removed = map( lambda e: e['id'], filter(lambda d: d['id'] not in used_entities, document_data['denotations'])) cursor = get_connection().cursor() id_string = "('" + "', '".join(to_be_removed) + "')" cursor.execute( 'DELETE FROM LTN_DEVELOP.ENTITIES WHERE ID IN ' + id_string + ' AND USER_DOC_ID = ?', (user_doc_id, )) get_connection().commit() cursor.close()
def predict_relations(user_document_id, task_id): cursor = get_connection().cursor() sql_to_prepare = 'CALL LTN_DEVELOP.PREDICT_UD (?, ?, ?)' params = {'UD_ID': user_document_id, 'TASK_ID': str(task_id)} psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) cursor.execute_prepared(ps, [params]) pairs = cursor.fetchall() return store_predicted_relations(pairs, user_document_id)
def return_entities(): req = request.get_json() document_id = req['document_id'] user1 = req['user1'] user2 = req['user2'] cursor = get_connection().cursor() predictions = sorted(get_entities_for_user_document(cursor, document_id, user1), key=lambda x: x.start) gold_standard = sorted(get_entities_for_user_document(cursor, document_id, user2), key=lambda x: x.start) p = 0 matches, left_aligns, right_aligns, overlaps, misses, wrong_type = 0, 0, 0, 0, 0, {} for entity in gold_standard: if len(predictions) == 0: misses += 1 continue while predictions[p].end < entity.start: if p == len(predictions) - 1: break p += 1 can_miss = True for candidate in predictions[p:]: if candidate.start > entity.end: if can_miss: misses += 1 can_miss = False break if candidate.end < entity.start: break can_miss = False if candidate.start != entity.start: if candidate.end == entity.end: if candidate.type != entity.type: wrong_type["right-aligns"] = wrong_type.get("right-aligns", 0) + 1 right_aligns += 1 else: if candidate.type != entity.type: wrong_type["overlaps"] = wrong_type.get("overlaps", 0) + 1 overlaps += 1 else: if candidate.end == entity.end: if candidate.type != entity.type: wrong_type["matches"] = wrong_type.get("matches", 0) + 1 matches += 1 else: if candidate.type != entity.type: wrong_type["left-aligns"] = wrong_type.get("left-aligns", 0) + 1 left_aligns += 1 if can_miss: misses += 1 return respond_with({"matches": matches, "left-aligns": left_aligns, "right-aligns": right_aligns, "overlaps": overlaps, "misses": misses, "wrong-type": wrong_type})
def get_base_types(): cursor = get_connection().cursor() cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", ID FROM LTN_DEVELOP.TYPES ORDER BY "GROUP" DESC') types = list() for aType in cursor.fetchall(): types.append({"code": aType[0], "name": aType[1], "groupId": aType[2], "group": aType[3], "id": aType[4]}) return types
def save_annotations(user_doc_id, annotations): # only save annotations from the current user, defined as userId 0 at loading time filtered_annotations = filter( lambda annotation: annotation.get('userId', 0) == 0, annotations) if not user_doc_id: return False print "inserting new annotations..." annotation_tuples = map( lambda annotation: convert_annotation(annotation, user_doc_id), filtered_annotations) cursor = get_connection().cursor() cursor.executemany( "INSERT INTO LTN_DEVELOP.ENTITIES (ID, USER_DOC_ID, TYPE_ID, LABEL) " "VALUES (?, ?, ?, ?)", annotation_tuples) print "inserting new offsets..." offset_tuples = map( lambda annotation: convert_offset(annotation, user_doc_id), filtered_annotations) cursor.executemany("INSERT INTO LTN_DEVELOP.OFFSETS VALUES (?, ?, ?, ?)", offset_tuples) get_connection().commit() return True
def get_task_types(task_id, relation): cursor = get_connection().cursor() relation_flag = int(relation) cursor.execute('SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID ' 'FROM LTN_DEVELOP.TYPES t ' 'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID ' 'WHERE tt.TASK_ID = ? AND tt.RELATION = ? ' 'ORDER BY "GROUP" DESC', (task_id, relation_flag)) types = list() for row in cursor.fetchall(): types.append({"code": row[0], "name": row[1], "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6]}) return types
def predict_entities(document_id, task_id, target_user_document_id): cursor = get_connection().cursor() cursor.execute('select "DOMAIN" from LTN_DEVELOP.tasks WHERE id = ?', (task_id,)) table_name = cursor.fetchone()[0] index_name = "$TA_INDEX_" + table_name er_index_name = "$TA_ER_INDEX_" + table_name cursor.execute(""" select distinct fti.ta_offset as "start", fti.ta_offset + length(fti.ta_token) as "end", fti.ta_token, tt.id from "LTN_DEVELOP"."%s" fti join "LTN_DEVELOP"."TYPES" t on (t.code = fti.ta_type or (t.code = 'T092' and fti.ta_type like 'ORGANIZATION%%')) join "LTN_DEVELOP"."TASK_TYPES" tt on t.id = tt.type_id and tt.task_id = ? join "LTN_DEVELOP"."%s" pos on fti.document_id = pos.document_id and fti.ta_offset = pos.ta_offset where fti.document_id = ? and length(fti.ta_token) >= 3 and pos.ta_type in ('noun', 'abbreviation', 'proper name') order by fti.ta_offset """ % (er_index_name, index_name), (task_id, document_id,)) entities = list() offsets = list() for row in cursor.fetchall(): entity_id = "%s_%s_%s_%s" % (target_user_document_id, row[0], row[2], row[3]) entity_id = entity_id.replace(' ', '_').replace('/', '_') entities.append((entity_id, target_user_document_id, int(row[3]), None, row[2])) offsets.append((row[0], row[1], entity_id, target_user_document_id)) cursor.executemany('insert into "LTN_DEVELOP"."ENTITIES" VALUES (?, ?, ?, ?, ?)', entities) cursor.executemany('insert into "LTN_DEVELOP"."OFFSETS" VALUES (?, ?, ?, ?)', offsets) get_connection().commit() cursor.close()
def login(): if get_connection() is None: try_reconnecting() req = request.get_json() if req and 'username' in req and 'password' in req: try: user = load_user(req['username']) if user and req['password'] == user.token: login_user(user, remember=True) user.token = None return respond_with(user.__dict__) except Exception, e: reset_connection() return str(e) + " Please try again later.", 500
def get_types(document_id, relation): cursor = get_connection().cursor() relation_flag = int(relation) cursor.execute('''SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID FROM LTN_DEVELOP.TYPES t JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID JOIN LTN_DEVELOP.DOCUMENTS d ON tt.TASK_ID = d.TASK WHERE d.id = ? AND tt.RELATION = ? ORDER BY "GROUP" DESC''', (document_id, relation_flag)) types = list() for row in cursor.fetchall(): types.append({"code": row[0], "name": "%s (%s)" % (row[4], row[1]), "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6]}) return types
def create_document_in_database(document_id, document_text, document_visibility, task): cursor = get_connection().cursor() cursor.execute("SELECT COUNT(*) FROM LTN_DEVELOP.DOCUMENTS WHERE ID = ?", (document_id,)) result = cursor.fetchone() if result[0] != 0: return "A document with the ID '%s' already exists" % (document_id,), 409 sql_to_prepare = 'CALL LTN_DEVELOP.add_document (?, ?, ?)' params = { 'DOCUMENT_ID': document_id, 'DOCUMENT_TEXT': document_text.replace("'", "''"), 'TASK': task } psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) cursor.execute_prepared(ps, [params]) get_connection().commit() cursor.execute("INSERT INTO LTN_DEVELOP.USER_DOCUMENTS VALUES (?, ?, ?, ?, ?, ?)", (create_new_user_doc_id(current_user.get_id(), document_id), current_user.get_id(), document_id, document_visibility, datetime.now(), datetime.now())) get_connection().commit() return "Successfully imported", 201
def predict(): data = request.get_json() task_id = data['task_id'] jobs = data.get('jobs', [PREDICT_ENTITIES]) document_id = data['document_id'] user_id = data.get('user_id', current_user.get_id()) current_prediction_user = prediction_user_for_user(user_id) prediction_user_doc_id = load_user_doc_id(document_id, current_prediction_user) delete_user_document(prediction_user_doc_id) document_data = json.loads(data.get('current_state', None)) if document_data is None: document_data = load_document(document_id, user_id) else: # the current status has to be saved first in order to disambiguate the ids of the annotations user_doc_id = load_user_doc_id(document_id, current_user.get_id()) successful = save_document(document_data, user_doc_id, document_id, current_user.get_id(), task_id) if not successful: return "Could not save the document", 500 if PREDICT_ENTITIES in jobs: cursor = get_connection().cursor() cursor.execute('INSERT INTO "LTN_DEVELOP"."USER_DOCUMENTS" ' 'VALUES (?, ?, ?, 0, current_timestamp, current_timestamp)', (prediction_user_doc_id, current_prediction_user, document_id,)) cursor.close() get_connection().commit() predict_entities(document_id, task_id, prediction_user_doc_id) if PREDICT_RELATIONS in jobs: if PREDICT_ENTITIES not in jobs: save_document(document_data, prediction_user_doc_id, document_id, current_prediction_user, task_id, False) predicted_pairs = predict_relations(prediction_user_doc_id, task_id) if PREDICT_ENTITIES not in jobs: remove_entities_without_relations(predicted_pairs, document_data, prediction_user_doc_id) document_data = load_document(document_id, current_user.get_id(), True) return respond_with(document_data)
def call_start_training(): global should_continue while True: if not should_continue: break try: global model_training_queue task_id = model_training_queue.pop() except KeyError: time.sleep(10) continue cursor = get_connection().cursor() sql_to_prepare = 'CALL LTN_DEVELOP.LTN_TRAIN (?)' params = {'TASK_ID': task_id} psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except Exception, e: print 'Error: ', e finally:
def get_base_types(): cursor = get_connection().cursor() cursor.execute( 'SELECT CODE, NAME, GROUP_ID, "GROUP", ID FROM LTN_DEVELOP.TYPES ORDER BY "GROUP" DESC' ) types = list() for aType in cursor.fetchall(): types.append({ "code": aType[0], "name": aType[1], "groupId": aType[2], "group": aType[3], "id": aType[4] }) return types
def manage_task_type(type_id): cursor = get_connection().cursor() if request.method == 'GET': cursor.execute( 'SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID ' 'FROM LTN_DEVELOP.TYPES t ' 'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID ' 'WHERE tt.ID = ?', (type_id, )) row = cursor.fetchone() if row: return respond_with({ "code": row[0], "name": row[1], "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6] }) return 'NOT FOUND', 404 elif request.method == 'PUT': req = request.get_json() updated_type = req.get('type') cursor.execute('SELECT ID FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id, )) already_existing = cursor.fetchone() if already_existing: cursor.execute( 'UPDATE LTN_DEVELOP.TASK_TYPES SET ID = ?, LABEL = ?, TYPE_ID = ? ' 'WHERE ID = ?', (updated_type.get('id'), updated_type.get('label'), updated_type.get('type_id'), type_id)) get_connection().commit() return 'UPDATED', 200 else: task_id = req.get('task') is_relation = req.get('relation') cursor.execute( 'INSERT INTO LTN_DEVELOP.TASK_TYPES (LABEL, TASK_ID, TYPE_ID, RELATION) ' 'VALUES (?, ?, ?, ?)', (updated_type.get('label'), task_id, updated_type.get('type_id'), is_relation)) get_connection().commit() return 'CREATED', 200 elif request.method == 'DELETE': cursor.execute('DELETE FROM LTN_DEVELOP.TASK_TYPES WHERE ID = ?', (type_id, )) get_connection().commit() return 'DELETED', 200
def get_tasks(): cursor = get_connection().cursor() cursor.execute( 'SELECT t.id, t.name, t.domain, t.config, t.author, u.name ' 'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ORDER BY t.id' ) tasks = list() for result in cursor.fetchall(): tasks.append({ 'task_id': result[0], 'task_name': result[1], 'task_domain': result[2], 'task_config': result[3], 'user_id': result[4], 'user_name': result[5] }) return respond_with(tasks)
def delete_annotation_data(user_doc_id): cursor = get_connection().cursor() print "Deleting old information for " + str(user_doc_id) + "..." print "Deleting existing pairs..." cursor.execute("DELETE FROM LTN_DEVELOP.PAIRS WHERE USER_DOC_ID = ?", (user_doc_id, )) get_connection().commit() print "Deleting existing offsets..." cursor.execute("DELETE FROM LTN_DEVELOP.OFFSETS WHERE USER_DOC_ID = ?", (user_doc_id, )) get_connection().commit() print "Deleting existing annotations..." cursor.execute("DELETE FROM LTN_DEVELOP.ENTITIES WHERE USER_DOC_ID = ?", (user_doc_id, )) get_connection().commit()
def load_document(document_id, user_id, show_predictions=False): cursor = get_connection().cursor() result = {} print "Loading information for document_id: '%s' and user: '******'" % ( document_id, user_id) result['text'] = get_text(cursor, document_id) denotations, users, annotation_id_map = get_denotations_and_users( cursor, document_id, user_id, show_predictions) result['denotations'] = denotations result['relations'] = get_relations(cursor, document_id, user_id, annotation_id_map, show_predictions) result['sourceid'] = document_id result['config'] = { 'entity types': get_entity_types(document_id), 'relation types': get_relation_types(document_id), 'users': users } cursor.close() return result
def get_task_types(task_id, relation): cursor = get_connection().cursor() relation_flag = int(relation) cursor.execute( 'SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID ' 'FROM LTN_DEVELOP.TYPES t ' 'JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID ' 'WHERE tt.TASK_ID = ? AND tt.RELATION = ? ' 'ORDER BY "GROUP" DESC', (task_id, relation_flag)) types = list() for row in cursor.fetchall(): types.append({ "code": row[0], "name": row[1], "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6] }) return types
def get_types(document_id, relation): cursor = get_connection().cursor() relation_flag = int(relation) cursor.execute( '''SELECT CODE, NAME, GROUP_ID, "GROUP", "LABEL", t.ID, tt.ID FROM LTN_DEVELOP.TYPES t JOIN LTN_DEVELOP.TASK_TYPES tt ON t.ID = tt.TYPE_ID JOIN LTN_DEVELOP.DOCUMENTS d ON tt.TASK_ID = d.TASK WHERE d.id = ? AND tt.RELATION = ? ORDER BY "GROUP" DESC''', (document_id, relation_flag)) types = list() for row in cursor.fetchall(): types.append({ "code": row[0], "name": "%s (%s)" % (row[4], row[1]), "groupId": row[2], "group": row[3], "label": row[4], "type_id": row[5], "id": row[6] }) return types
def get_document_details(document_id): user_documents = list() cursor = get_connection().cursor() user_id = current_user.get_id() cursor.execute( 'SELECT d.id, MIN(d.user_id), MIN(u.name), COUNT(DISTINCT e.id), COUNT(distinct p.id), MIN(d.visibility) ' 'FROM LTN_DEVELOP.USER_DOCUMENTS d ' 'JOIN LTN_DEVELOP.USERS u ON u.id = d.user_id ' 'LEFT OUTER JOIN LTN_DEVELOP.ENTITIES e ON e.user_doc_id = d.id ' 'LEFT OUTER JOIN LTN_DEVELOP.PAIRS p ON p.user_doc_id = d.id AND p.ddi = 1 ' 'WHERE d.document_id = ? AND (d.visibility = 1 OR d.user_id = ?) ' 'GROUP BY d.id', (document_id, user_id)) for row in cursor.fetchall(): user_documents.append({ 'id': row[0], 'user_id': row[1], 'user_name': row[2], 'entities': row[3], 'pairs': row[4], 'visible': bool(row[5]), 'from_current_user': row[1] == user_id }) return respond_with(user_documents)
def return_entities(): req = request.get_json() document_id = req['document_id'] user1 = req['user1'] user2 = req['user2'] cursor = get_connection().cursor() predictions = sorted(get_entities_for_user_document( cursor, document_id, user1), key=lambda x: x.start) gold_standard = sorted(get_entities_for_user_document( cursor, document_id, user2), key=lambda x: x.start) p = 0 matches, left_aligns, right_aligns, overlaps, misses, wrong_type = 0, 0, 0, 0, 0, {} for entity in gold_standard: if len(predictions) == 0: misses += 1 continue while predictions[p].end < entity.start: if p == len(predictions) - 1: break p += 1 can_miss = True for candidate in predictions[p:]: if candidate.start > entity.end: if can_miss: misses += 1 can_miss = False break if candidate.end < entity.start: break can_miss = False if candidate.start != entity.start: if candidate.end == entity.end: if candidate.type != entity.type: wrong_type["right-aligns"] = wrong_type.get( "right-aligns", 0) + 1 right_aligns += 1 else: if candidate.type != entity.type: wrong_type["overlaps"] = wrong_type.get("overlaps", 0) + 1 overlaps += 1 else: if candidate.end == entity.end: if candidate.type != entity.type: wrong_type["matches"] = wrong_type.get("matches", 0) + 1 matches += 1 else: if candidate.type != entity.type: wrong_type["left-aligns"] = wrong_type.get( "left-aligns", 0) + 1 left_aligns += 1 if can_miss: misses += 1 return respond_with({ "matches": matches, "left-aligns": left_aligns, "right-aligns": right_aligns, "overlaps": overlaps, "misses": misses, "wrong-type": wrong_type })
def manage_task(task_id): cursor = get_connection().cursor() if request.method == 'GET': cursor.execute( 'SELECT t.id, t.name, t.domain, t.author, u.name ' 'FROM LTN_DEVELOP.TASKS t LEFT OUTER JOIN LTN_DEVELOP.USERS u ON u.id = t.author ' 'WHERE t.id = ?', (task_id, )) result = cursor.fetchone() cursor.execute( 'SELECT d.id, count(ud.id) ' 'FROM LTN_DEVELOP.TASKS t ' 'JOIN LTN_DEVELOP.DOCUMENTS d ON d.task = t.id ' 'LEFT OUTER JOIN LTN_DEVELOP.USER_DOCUMENTS ud ON ud.document_id = d.id ' 'AND (ud.visibility = 1 OR ud.user_id = ?) ' 'WHERE t.id = ? ' 'GROUP BY d.id ORDER BY d.id ASC', (current_user.get_id(), task_id)) documents = list() for row in cursor.fetchall(): documents.append({ 'document_id': row[0], 'user_document_count': row[1] }) return respond_with({ 'task_id': result[0], 'task_name': result[1], 'task_domain': result[2], 'user_id': result[3], 'user_name': result[4], 'documents': documents }) elif request.method == 'POST': req = request.get_json() if req.get('task_id') is not None: sql_to_prepare = 'CALL LTN_DEVELOP.update_task (?, ?, ?, ?, ?)' else: sql_to_prepare = 'CALL LTN_DEVELOP.add_task (?, ?, ?, ?, ?)' params = { 'TASK_ID': req.get('task_id'), 'TASK_NAME': req.get('task_name'), 'TABLE_NAME': req.get('task_domain'), 'ER_ANALYSIS_CONFIG': req.get('task_config'), 'NEW_AUTHOR': req.get('user_id') } if params.get('TABLE_NAME', None) is None: generate_table_name(params) if params.get('NEW_AUTHOR', None) is None: params['NEW_AUTHOR'] = current_user.get_id() psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except: pass # Rows affected warning return 'OK', 200 elif request.method == 'DELETE': sql_to_prepare = 'CALL LTN_DEVELOP.delete_task (?)' params = {'TASK_ID': task_id} psid = cursor.prepare(sql_to_prepare) ps = cursor.get_prepared_statement(psid) try: cursor.execute_prepared(ps, [params]) get_connection().commit() except: pass # Rows affected warning return 'OK', 200
def get_users(): cursor = get_connection().cursor() users = User.all(cursor) cursor.close() return respond_with(map(lambda user: user.__dict__, users))
def load_user(user_id): return User.get(user_id, get_connection().cursor())