def LF_cause(c): """ If the causual keywords are between disease and gene then should be positive predictor """ if len(cause_words.intersection(get_between_tokens(c))) > 0: return 1 return 0
def neg_nearby(c): if (len(negative.intersection(get_between_tokens(c))) > 0): return True elif (len(negative.intersection(get_left_tokens(c, window=15))) > 0): return True else: return False
def LF_DEBUG(c): """ This label function is for debugging purposes. Feel free to ignore. keyword arguments: c - The candidate object to be labeled """ print(c) print() print("Left Tokens") print(list(get_left_tokens(c[0], window=5))) print() print("Right Tokens") print(list(get_right_tokens(c[0]))) print() print("Between Tokens") print(list(get_between_tokens(c))) print() print("Tagged Text") print(get_tagged_text(c)) print(re.search(r'{{B}} .* is a .* {{A}}', get_tagged_text(c))) print() print("Get between Text") print(get_text_between(c)) print(len(get_text_between(c))) print() print("Parent Text") print(c.get_parent()) print() return 0
def LF_GiG_METHOD_DESC(c): sentence_tokens = " ".join(c.get_parent().words[0:20]) if re.search(ltp(method_indication), sentence_tokens, flags=re.I): return -1 elif re.search(ltp(method_indication), " ".join(get_between_tokens(c)), flags=re.I): return -1 else: return 0
def LF_DG_METHOD_DESC(c): """ This label function is designed to look for phrases that imply a sentence is description an experimental design """ sentence_tokens = " ".join(c.get_parent().words[0:20]) if re.search(ltp(method_indication), sentence_tokens, flags=re.I): return -1 elif re.search(ltp(method_indication), " ".join(get_between_tokens(c)), flags=re.I): return -1 else: return 0
def LF_DEBUG(C): print "Left Tokens" print get_left_tokens(c, window=3) print print "Right Tokens" print get_right_tokens(c) print print "Between Tokens" print get_between_tokens(c) print print "Tagged Text" print get_tagged_text(c) print re.search(r'{{B}} .* is a .* {{A}}', get_tagged_text(c)) print print "Get between Text" print get_text_between(c) print len(get_text_between(c)) print print "Parent Text" print c.get_parent() print return 0
def LF_positive(c): if (len(positive.intersection(get_between_tokens(c))) > 0) and not neg_nearby(c): return 1 elif (len(positive.intersection(get_left_tokens(c[0], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(positive.intersection(get_left_tokens(c[1], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(positive.intersection(get_right_tokens(c[0], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(positive.intersection(get_right_tokens(c[1], window=20))) > 0) and not neg_nearby(c): return 1 else: return 0
def LF_other_verbs(c): if (len(other_verbs.intersection(get_between_tokens(c))) > 0) and not neg_nearby(c): return 1 elif (len(other_verbs.intersection(get_left_tokens(c[0], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(other_verbs.intersection(get_left_tokens(c[1], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(other_verbs.intersection(get_right_tokens(c[0], window=20))) > 0) and not neg_nearby(c): return 1 elif (len(other_verbs.intersection(get_right_tokens(c[1], window=20))) > 0) and not neg_nearby(c): return 1 else: return 0
def LF_neg_assertions(c): if (len(negative.intersection(get_between_tokens(c))) > 0): return -1 elif (len(negative.intersection(get_left_tokens(c[0], window=10))) > 0): return -1 elif (len(negative.intersection(get_left_tokens(c[1], window=20))) > 0): return -1 elif (len(negative.intersection(get_right_tokens(c[0], window=20))) > 0): return -1 # elif (len(negative.intersection(get_right_tokens(c[1], window=20))) > 0): # return -1 else: return 0
def LF_isolate(c): if len(isolate.intersection(get_between_tokens(c))) > 0 and not neg_nearby(c): return 1 elif len(isolate.intersection(get_left_tokens(c[0], window=20))) > 0 and not neg_nearby(c): return 1 elif len(isolate.intersection(get_left_tokens(c[1], window=20))) > 0 and not neg_nearby(c): return 1 elif len(isolate.intersection(get_right_tokens(c[0], window=20))) > 0 and not neg_nearby(c): return 1 elif len(isolate.intersection(get_right_tokens(c[1], window=20))) > 0 and not neg_nearby(c): return 1 else: return 0
def LF_between_detect_refined(c): cl, cr = c candidate_predicates = list(get_between_tokens(c)) prepositions = {'at', 'to', 'from'} intransitive_predicates = { 'enrolled', 'graduated', 'studied', 'went', 'returned', 'educated' } transitive_predicates = {'attended'} phrases = {'member', 'of'} return 1 if len(transitive_predicates.intersection(candidate_predicates))>0 or \ len(prepositions.intersection(candidate_predicates))>0 and \ len(intransitive_predicates.intersection(candidate_predicates))>0 \ or len(phrases.intersection(candidate_predicates))>1 \ else -1
def LF_DG_TITLE(c): """ This label function is designed to look for phrases that inditcates a paper title """ if re.search(r'^(\[|\[ )?'+ltp(title_indication), get_tagged_text(c), flags=re.I): return -1 elif re.search(ltp(title_indication)+r'$', get_tagged_text(c), flags=re.I): return -1 elif "(author's transl)" in get_tagged_text(c): return -1 elif ":" in get_between_tokens(c): return -1 else: return 0
def LF_distant_supervision_and_words(c): try: subject_span = getattr(c, "subject").get_span() object_span = getattr(c, "object").get_span() if subject_span == object_span: return -1 if (len(words.intersection(c.get_parent().words)) < 1 \ or len(not_words.intersection(get_between_tokens(c)))>0) : return 0 if subject_span == object_span: return -1 if is_in_known_samples(predicate_resume, sentimantic_session, subject_span, object_span): return 1 sample_subject_span = getattr(c, "subject") sample_subjects = get_nouns(sample_subject_span, subject_type_end) sample_object_span = getattr(c, "object") sample_objects = get_nouns(sample_object_span, object_type_end) sample_subjects.append(subject_span) sample_objects.append(object_span) for sample_subject in sample_subjects: for sample_object in sample_objects: # if (sample_subject, sample_object)in known_samples: if is_in_known_samples(predicate_resume, sentimantic_session, sample_subject, sample_object): return 1 #todo implement date return 0 except Exception as e: print(e) print("Not found candidate" + str(c.id)) return 0
def LF_combined_refined(c): taboo = {'later', 'here', 'there'} return 1 if LF_combined(c) == 1 and not len( taboo.intersection(get_between_tokens(c))) > 0 else -1
def LF_combined(c): return 1 if (LF_between_detect_refined(c) == 1 or 'at' in get_between_tokens(c)) and LF_right_detect(c) == 1 else -1
def LF_familial_relationship(c): return -1 if len(family.intersection(get_between_tokens(c))) > 0 else 0
def LF_words_between(c): if len(words.intersection(get_between_tokens(c))) > 0: return 1 return 0
def LF_husband_wife(c): return 1 if len(spouses.intersection(get_between_tokens(c))) > 0 else 0
def LF_DG_DISTANCE_SHORT(c): """ This LF is designed to make sure that the disease mention and the gene mention aren't right next to each other. """ return -1 if len(list(get_between_tokens(c))) <= 2 else 0
def LF_crime_detect(c): return 1 if len(crime_tags.intersection(get_between_tokens(c))) > 0 else 0
def LF_political_title(c): return 1 if len(titles.intersection(set(get_between_tokens(c)))) > 0 else 0
def LF_not_words(c): return -1 if len(not_words.intersection( get_between_tokens(c))) > 0 else 0
def LF_DG_DISTANCE_LONG(c): """ This LF is designed to make sure that the disease mention and the gene mention aren't too far from each other. """ return -1 if len(list(get_between_tokens(c))) > 50 else 0
def LF_too_far_apart(c): return -1 if len(get_between_tokens(c)) > 10 else 0
def LF_and_married(c): return 1 if 'and' in get_between_tokens( c) and 'married' in get_right_tokens(c) else 0
def LF_nospace(c): return 1 if [] == get_between_tokens(c) else 0
def LF_other_relationship(c): return -1 if len(other.intersection(get_between_tokens(c))) > 0 else 0
def LF_betweentokens(c): if len(get_between_tokens(c)) < 2: return 1 else: return 0
def LF_and_married(c): return 1 if 'and' in get_between_tokens(c) and 'married' in get_right_tokens(c) else 0
def HELPER_VERB_BTWN(c): return 1 if len( helper_vbs.intersection( set(get_between_tokens(c, attrib='lemmas', n_max=3)))) > 0 else 0
def PREP_PHRASE_BTWN(c): return 1 if len( helper_vbs.intersection( set(get_between_tokens(c, attrib='lemmas', n_max=3)))) > 0 else 0