def LF_DEBUG(c):
    """
    This label function is for debugging purposes. Feel free to ignore.
    keyword arguments:
    c - The candidate object to be labeled
    """
    print(c)
    print()
    print("Left Tokens")
    print(list(get_left_tokens(c[0], window=5)))
    print()
    print("Right Tokens")
    print(list(get_right_tokens(c[0])))
    print()
    print("Between Tokens")
    print(list(get_between_tokens(c)))
    print()
    print("Tagged Text")
    print(get_tagged_text(c))
    print(re.search(r'{{B}} .* is a .* {{A}}', get_tagged_text(c)))
    print()
    print("Get between Text")
    print(get_text_between(c))
    print(len(get_text_between(c)))
    print()
    print("Parent Text")
    print(c.get_parent())
    print()
    return 0
def LF_DG_CONCLUSION_TITLE(c):
    """"
    This label function searches for the word conclusion at the beginning of the sentence.
    Some abstracts are written in this format.
    """
    return 1 if "CONCLUSION:" in get_tagged_text(
        c) or "concluded" in get_tagged_text(c) else 0
Beispiel #3
0
def LF_CG_IN_SERIES(c):
    """
    This label function is designed to look for a mention being caught
    in a series of other genes or compounds
    """
    if len(re.findall(r',', get_tagged_text(c))) >= 2:
        if re.search(', and', get_tagged_text(c)):
            return -1
    return 0
def LF_DG_TITLE(c):
    """
    This label function is designed to look for phrases that inditcates
    a paper title
    """
    if re.search(r'^' + ltp(title_indication), get_tagged_text(c), flags=re.I):
        return -1
    elif re.search(ltp(title_indication) + r'$',
                   get_tagged_text(c),
                   flags=re.I):
        return -1
    else:
        return 0
def LF_DG_METHOD_DESC(c):
    """
    This label function is designed to look for phrases 
    that imply a sentence is description an experimental design
    """

    #TODO FIX for words that change the sentence menaing from methods to results
    if "we found" in get_tagged_text(c):
        return 0
    if re.search(ltp(method_indication), get_tagged_text(c), flags=re.I):
        return -1
    else:
        return 0
def LF_DG_TITLE(c):
    """
    This label function is designed to look for phrases that inditcates
    a paper title
    """
    if re.search(r'^(\[|\[ )?'+ltp(title_indication), get_tagged_text(c), flags=re.I):
        return -1
    elif re.search(ltp(title_indication)+r'$', get_tagged_text(c), flags=re.I):
        return -1
    elif "(author's transl)" in get_tagged_text(c):
        return -1
    elif ":" in get_between_tokens(c):
        return -1
    else:
        return 0
def LF_DG_RISK(c):
    """
    This label function searched for sentences that mention a patient being at risk for disease or 
    a signal implying increased/decreased risk of disease.
    """
    return 1 if re.search(r"risk (of|for)", get_tagged_text(c),
                          flags=re.I) else 0
def LF_DG_NEGATIVE_DIRECTION(c):
    """
    This label function is designed to search for words that indicate
    a sort of negative response or imply an downregulates association
    """
    return 1 if any([rule_regex_search_btw_AB(c, r'.*'+ltp(negative_direction)+r'.*', 1), rule_regex_search_btw_BA(c, r'.*'+ltp(negative_direction)+r'.*', 1)]) or  \
        re.search(r'({{A}}|{{B}}).*({{A}}|{{B}}).*' + ltp(negative_direction), get_tagged_text(c)) else 0
def LF_DG_IS_BIOMARKER(c):
    """
    This label function examines a sentences to determine of a sentence
    is talking about a biomarker. (A biomarker leads towards D-G assocation
    c - The candidate obejct being passed in
    """
    if re.search(ltp(biomarker_indicators) + r".*{{B}}",
                 get_tagged_text(c),
                 flags=re.I):
        return 1
    elif re.search(r"{{B}}.*" + ltp(biomarker_indicators),
                   get_tagged_text(c),
                   flags=re.I):
        return 1
    else:
        return 0
def LF_DG_DIAGNOSIS(c):
    """
    This label function is designed to search for words that imply a patient diagnosis
    which will provide evidence for possible disease gene association.
    """
    return 1 if any([rule_regex_search_btw_AB(c, r'.*'+ltp(diagnosis_indicators) + r".*", 1), rule_regex_search_btw_BA(c, r'.*'+ltp(diagnosis_indicators) + r".*", 1)]) or  \
        re.search(r'({{A}}|{{B}}).*({{A}}|{{B}}).*' + ltp(diagnosis_indicators), get_tagged_text(c)) else 0
def LF_DG_PATIENT_WITH(c):
    """
    This label function looks for the phrase "  with" disease.
    """
    return 1 if re.search(r"patient(s)? with.{1,200}{{A}}",
                          get_tagged_text(c),
                          flags=re.I) else 0
Beispiel #12
0
def LF_DISEASE_SUFFIX(c):
    """
    This LF is designed to confirm that the entity labeld as gene
    is really a disease. It looks for key phrases/words that will 
    suggest the possibility of the tagged entity being a disease.
    """
    return 1 if re.search(r'{{A}} ' + ltp(gene_suffix_indicators), get_tagged_text(c) re.I) else 0
def LF_DG_NO_ASSOCIATION(c):
    """
    This LF is designed to test if there is a key phrase that suggests
    a d-g pair is no an association.
    """
    if re.search(ltp(no_direct_association), get_text_between(c), flags=re.I):
        return -1
    elif re.search(ltp(no_direct_association) + r".*({{B}}|{{A}})",
                   get_tagged_text(c),
                   flags=re.I):
        return -1
    elif re.search(r"({{B}}|{{A}}).*" + ltp(no_direct_association),
                   get_tagged_text(c),
                   flags=re.I):
        return -1
    else:
        return 0
def LF_DG_WEAK_ASSOCIATION(c):
    """
    This label function is design to search for phrases that indicate a 
    weak association between the disease and gene
    """
    if re.search(ltp(weak_association), get_text_between(c), flags=re.I):
        return -1
    elif re.search(ltp(weak_association) + r".*({{B}}|{{A}})",
                   get_tagged_text(c),
                   flags=re.I):
        return -1
    elif re.search(r"({{B}}|{{A}}).*" + ltp(weak_association),
                   get_tagged_text(c),
                   flags=re.I):
        return -1
    else:
        return 0
Beispiel #15
0
def LF_CD_METHOD_DESC(c):
    """
    This label function is designed to look for phrases 
    that imply a sentence is description an experimental design
    """
    if re.search(ltp(method_indication), get_tagged_text(c), flags=re.I):
        return -1
    else:
        return 0
def LF_DEBUG(C):
    print "Left Tokens"
    print get_left_tokens(c, window=3)
    print
    print "Right Tokens"
    print get_right_tokens(c)
    print
    print "Between Tokens"
    print get_between_tokens(c)
    print
    print "Tagged Text"
    print get_tagged_text(c)
    print re.search(r'{{B}} .* is a .* {{A}}', get_tagged_text(c))
    print
    print "Get between Text"
    print get_text_between(c)
    print len(get_text_between(c))
    print
    print "Parent Text"
    print c.get_parent()
    print
    return 0
def LF_DaG_ASSOCIATION(c):
    """
    This LF is designed to test if there is a key phrase that suggests
    a d-g pair is an association.
    """
    if LF_DG_METHOD_DESC(c) or LF_DG_TITLE(c):
        return 0
    elif re.search(r'(?<!not )(?<!no )' + ltp(direct_association),
                   get_text_between(c),
                   flags=re.I):
        return 1
    elif re.search(r'(?<!not )(?<!no )' + ltp(direct_association) +
                   r".*({{B}}|{{A}})",
                   get_tagged_text(c),
                   flags=re.I):
        return 1
    elif re.search(r"({{B}}|{{A}}).*(?<!not )(?<!no )" +
                   ltp(direct_association),
                   get_tagged_text(c),
                   flags=re.I):
        return 1
    else:
        return 0
Beispiel #18
0
def LF_DdG_DOWNREGULATES(c):
    """
    This label function is designed to search for words that indicate
    a sort of negative response or imply an downregulates association
    """
    if LF_DG_METHOD_DESC(c) or LF_DG_TITLE(c):
        return 0
    else:
        if rule_regex_search_btw_AB(c, r'.*'+ltp(downregulates)+r'.*', 1):
            return 1
        elif rule_regex_search_btw_BA(c, r'.*'+ltp(downregulates)+r'.*', 1):
            return 1
        elif re.search(r'({{A}}|{{B}}).*({{A}}|{{B}}).*' + ltp(downregulates), get_tagged_text(c)):
            return 1
        else:
            return 0
Beispiel #19
0
def LF_DaG_CELLULAR_ACTIVITY(c):
    """
    This LF is designed to look for key phrases that indicate activity within a cell.
    e.x. positive immunostating for an experiment
    """
    left_window = " ".join(get_left_tokens(c[0], window=10)) + " ".join(get_left_tokens(c[1], window=10))
    right_window = " ".join(get_right_tokens(c[0], window=10)) + " ".join(get_right_tokens(c[1], window=10))
    
    if re.search(ltp(cellular_activity), get_tagged_text(c), flags=re.I):
        return 1
    elif re.search(ltp(cellular_activity), left_window, flags=re.I):
        return 1
    elif re.search(ltp(cellular_activity), right_window, flags=re.I):
        return 1
    else:
        return 0
Beispiel #20
0
def LF_CtD_TRIAL(c):
    return 1 if re.search(
        ltp(trial_indications), get_tagged_text(c), flags=re.I) else 0
Beispiel #21
0
def rule_regex_search_before_A(candidate, pattern, sign):
    """
    Check if regex before expresision A
    """
    return sign if re.search(
        pattern + r'*{{A}}', get_tagged_text(candidate), flags=re.I) else 0
def LF_DdG_METHYLATION(c):
    if "methylation" in get_tagged_text(c):
        return 1
    return 0
Beispiel #23
0
def LF_GG_IN_SERIES(c):
    if len(re.findall(r',', get_tagged_text(c))) >= 2:
        if re.search(', and', get_tagged_text(c)):
            return -1
    return 0
Beispiel #24
0
def LF_positive2(c):
    return 1 if (re.search(
        r'{{A}}.{0,100} ' + ltp(positive_l) +
        '.{0,100}{{B}}', get_tagged_text(c), re.I) and not re.search(
            '{{A}}.{0,100}(not|no|negative).{0,20}' + ltp(positive_l) +
            '.{0,100}{{B}}', get_tagged_text(c), re.I)) else 0
Beispiel #25
0
def LF_neg_h(c):
    return -1 if re.search(
        neg_rgx + '.{0,50}{{B}}', get_tagged_text(c), flags=re.I) else 0
Beispiel #26
0
def LF_h_v(c):
    return 1 if (re.search(r'{{B}}.{0,250}{{A}}', get_tagged_text(c), re.I)
                 and not re.search(neg_rgx, get_tagged_text(c), re.I)) else 0
Beispiel #27
0
def LF_v_cause_h(c):
    return 1 if (re.search(r'{{A}}.{0,50} ' + ltp(causal) +
                           '.{0,50}{{B}}', get_tagged_text(c), re.I)
                 and not re.search(
                     '{{A}}.{0,50}(not|no|negative).{0,20}' + ltp(causal) +
                     '.{0,50}{{B}}', get_tagged_text(c), re.I)) else 0
    LF_disease_context
]

# # Test out Label Functions

# In[ ]:

labeled = []
candidates = session.query(DiseaseGene).filter(DiseaseGene.split == 0).all()
#candidates = [session.query(DiseaseGene).filter(DiseaseGene.id == ids).one() for ids in [19817,19818,19830,19862,19980,20001,20004]]

for c in candidates:
    if c[0].get_parent().id != 14264:
        continue
    print c
    print get_tagged_text(c)
    print c[1].sentence.entity_cids[c[1].get_word_start()]

# # Label The Candidates

# This block of code will run through the label functions and label each candidate in the training and development groups.

# In[ ]:

labeler = LabelAnnotator(f=LFs)

get_ipython().magic(u'time L_train = labeler.apply(split=0)')
get_ipython().magic(u'time L_dev = labeler.apply_existing(split=1)')
get_ipython().magic(u'time L_test = labeler.apply_existing(split=2)')

# In[ ]:
Beispiel #29
0
def LF_CD_TRIAL(c):
    return 1 if ltp(trial_indications) in get_tagged_text(c) else 0
def LF_DG_PURPOSE(c):
    """"
    This label function searches for the word purpose at the beginning of the sentence.
    Some abstracts are written in this format.
    """
    return -1 if "PURPOSE:" in get_tagged_text(c) else 0