def label_earmark(earmark, matches, conn, cmd, cur): os.system('clear') short_desc = normalize(earmark['short_description']) full_desc = normalize(earmark['full_description']) recipient = normalize(earmark['recipient']) print "Lets Label Earmark: %d" % earmark['earmark_id'] #print "Recipient: %s" % bcolors.OKGREEN + recipient + bcolors.ENDC print "Short Desription: %s" % bcolors.OKGREEN + short_desc + bcolors.ENDC print "Full Description: %s" % bcolors.OKGREEN + full_desc + bcolors.ENDC consecutive_nos = 0 labeled = False for i in range(len(matches)): score = matches[i][0] entity = matches[i][1] earmark_document_id = matches[i][2] entity_id = entity['id'] query_str = bcolors.WARNING + entity['entity_inferred_name'] + bcolors.ENDC #Auto label the rest false if consecutive_nos > 10: cur.execute(cmd, (earmark['earmark_id'], earmark_document_id, entity_id, score, False)) elif score >= AUTO_LABEL_POSITIVE: print "Auto Lableing: %s" % query_str cur.execute(cmd, (earmark['earmark_id'], earmark_document_id, entity_id, score, True)) labeled = True elif score <= AUTO_LABEL_NEGATIVE: pass #cur.execute(cmd, (earmark['earmark_id'], earmark_document_id, entity_id, score, False)) else: print path_tools.doc_id_to_path(earmark_document_id) if query_yes_no(query_str): cur.execute(cmd, (earmark['earmark_id'], earmark_document_id, entity_id, score, True)) consecutive_nos = 0 else: cur.execute(cmd, (earmark['earmark_id'], earmark_document_id, entity_id, score, False)) consecutive_nos += 1 labeled = True if not labeled: print "no matches above minimum theshold!" cur.execute(cmd, (earmark['earmark_id'], -1, -1, 0, False)) conn.commit()
def analyze_entity(entity_id): if entity_is_earmark(entity_id): print "Entity is matched to earmark already, your data might be stale. Ignoring entity %d" % (entity_id) return if entity_is_negative_example(entity_id): print "Entity %d is flagged as negative example, ignoring" % (entity_id) return entity = Entity(entity_id) question = "Does this entity look like an earmark?\n%s\nId: %d\nPath: %s\n" % ( entity.entity_inferred_name, entity_id, path_tools.doc_id_to_path(entity.document_id), ) is_earmark = prompt.query_yes_no(question, default="yes") if is_earmark: question = "Does it match an earmark on OMB website?\n" is_match = prompt.query_yes_no(question, default="yes") if is_match: question = "What is the earmark id?\n" earmark_id = prompt.query_number(question) amend_earmark.match_earmark_with_entity(earmark_id, entity.id) else: question = "Are you sure you want to create new earmark?\n" if prompt.query_yes_no(question, default="yes"): # question = "Please enter a year for the earmark?\n" year = path_tools.get_report_year(entity.document_id) # prompt.query_number(question) amend_earmark.crete_new_earmark(entity.id, year) else: # it is not an earmark, now flag it as negative question = "Do you want to flag it as negative match?\n" if prompt.query_yes_no(question, default="yes"): amend_earmark.insert_entity_to_negative_table(entity_id) print "Entity %d has been labeled as negative example" % (entity_id) print chr(27) + "[2J" # this clears the terminal
def analyze_entity(entity_id): if entity_is_earmark(entity_id): print "Entity is matched to earmark already, your data might be stale. Ignoring entity %d" % ( entity_id) return if entity_is_negative_example(entity_id): print "Entity %d is flagged as negative example, ignoring" % ( entity_id) return entity = Entity(entity_id) question = "Does this entity look like an earmark?\n%s\nId: %d\nPath: %s\n" % ( entity.entity_inferred_name, entity_id, path_tools.doc_id_to_path(entity.document_id)) is_earmark = prompt.query_yes_no(question, default="yes") if is_earmark: question = "Does it match an earmark on OMB website?\n" is_match = prompt.query_yes_no(question, default="yes") if is_match: question = "What is the earmark id?\n" earmark_id = prompt.query_number(question) amend_earmark.match_earmark_with_entity(earmark_id, entity.id) else: question = "Are you sure you want to create new earmark?\n" if prompt.query_yes_no(question, default="yes"): #question = "Please enter a year for the earmark?\n" year = path_tools.get_report_year( entity.document_id) #prompt.query_number(question) amend_earmark.crete_new_earmark(entity.id, year) else: # it is not an earmark, now flag it as negative question = "Do you want to flag it as negative match?\n" if prompt.query_yes_no(question, default="yes"): amend_earmark.insert_entity_to_negative_table(entity_id) print "Entity %d has been labeled as negative example" % ( entity_id) print chr(27) + "[2J" # this clears the terminal