コード例 #1
0
     lambda x: TSVstring2list(x, int), TSVstring2bool, no_op, no_op,
     lambda x: TSVstring2list(x, int), TSVstring2bool, no_op
 ])
 # Create the sentence object where the two mentions appear
 sentence = Sentence(line_dict["doc_id"], line_dict["sent_id"],
                     line_dict["wordidxs"], line_dict["words"],
                     line_dict["poses"], line_dict["ners"],
                     line_dict["lemmas"], line_dict["dep_paths"],
                     line_dict["dep_parents"],
                     line_dict["bounding_boxes"])
 # Create the mentions
 gene_1_mention = Mention(
     "GENE", line_dict["gene_1_entity"],
     [sentence.words[j] for j in line_dict["gene_1_wordidxs"]])
 gene_1_mention.is_correct = line_dict["gene_1_is_correct"]
 gene_1_mention.type = line_dict["gene_1_type"]
 gene_2_mention = Mention(
     "GENE", line_dict["gene_2_entity"],
     [sentence.words[j] for j in line_dict["gene_2_wordidxs"]])
 gene_2_mention.is_correct = line_dict["gene_2_is_correct"]
 gene_2_mention.type = line_dict["gene_2_type"]
 # If the word indexes do not overlap, create the relation candidate
 # TODO there may be other cases. Check with Emily.
 if not set(line_dict["gene_1_wordidxs"]) & \
         set(line_dict["gene_2_wordidxs"]):
     relation = Relation("GENEGENE", gene_1_mention, gene_2_mention)
     # Add features
     add_features(relation, gene_1_mention, gene_2_mention,
                  sentence)
     # Supervise
     # One of the two mentions (or both) is labelled as False
コード例 #2
0
         TSVstring2list, lambda x: TSVstring2list(x, int),
         TSVstring2list, no_op, lambda x: TSVstring2list(x, int),
         TSVstring2bool, no_op, no_op, lambda x: TSVstring2list(x,
         int), TSVstring2bool, no_op])
 # Create the sentence object where the two mentions appear
 sentence = Sentence(
     line_dict["doc_id"], line_dict["sent_id"],
     line_dict["wordidxs"], line_dict["words"], line_dict["poses"],
     line_dict["ners"], line_dict["lemmas"], line_dict["dep_paths"],
     line_dict["dep_parents"], line_dict["bounding_boxes"])
 # Create the mentions
 gene_1_mention = Mention(
     "GENE", line_dict["gene_1_entity"],
     [sentence.words[j] for j in line_dict["gene_1_wordidxs"]])
 gene_1_mention.is_correct = line_dict["gene_1_is_correct"]
 gene_1_mention.type = line_dict["gene_1_type"]
 gene_2_mention = Mention(
     "GENE", line_dict["gene_2_entity"],
     [sentence.words[j] for j in line_dict["gene_2_wordidxs"]])
 gene_2_mention.is_correct = line_dict["gene_2_is_correct"]
 gene_2_mention.type = line_dict["gene_2_type"]
 # If the word indexes do not overlap, create the relation candidate
 # TODO there may be other cases. Check with Emily.
 if not set(line_dict["gene_1_wordidxs"]) & \
         set(line_dict["gene_2_wordidxs"]):
     relation = Relation(
         "GENEGENE", gene_1_mention, gene_2_mention)
     # Add features
     add_features(relation, gene_1_mention, gene_2_mention,
                 sentence)
     # Supervise
コード例 #3
0
 # Iterate over each pair of (gene,phenotype) mention
 for g_idx in range(len(line_dict["gene_is_corrects"])):
     g_wordidxs = TSVstring2list(
         line_dict["gene_wordidxss"][g_idx], int)
     gene_mention = Mention(
         "GENE", line_dict["gene_entities"][g_idx],
         [sentence.words[j] for j in g_wordidxs])
     if line_dict["gene_is_corrects"][g_idx] == "n":
         gene_mention.is_correct = None
     elif line_dict["gene_is_corrects"][g_idx] == "f":
         gene_mention.is_correct = False
     elif line_dict["gene_is_corrects"][g_idx] == "t":
         gene_mention.is_correct = True
     else:
         assert False
     gene_mention.type = line_dict["gene_types"][g_idx]
     assert not gene_mention.type.endswith("_UNSUP")
     for h_idx in range(len(line_dict["hpoterm_is_corrects"])):
         h_wordidxs = TSVstring2list(
             line_dict["hpoterm_wordidxss"][h_idx], int)
         hpoterm_mention = Mention(
             "hpoterm", line_dict["hpoterm_entities"][h_idx],
             [sentence.words[j] for j in h_wordidxs])
         if line_dict["hpoterm_is_corrects"][h_idx] == "n":
             hpoterm_mention.is_correct = None
         elif line_dict["hpoterm_is_corrects"][h_idx] == "f":
             hpoterm_mention.is_correct = False
         elif line_dict["hpoterm_is_corrects"][h_idx] == "t":
             hpoterm_mention.is_correct = True
         else:
             assert False
コード例 #4
0
 # Iterate over each pair of (gene,phenotype) mention
 for g_idx in range(len(line_dict["gene_is_corrects"])):
     g_wordidxs = TSVstring2list(line_dict["gene_wordidxss"][g_idx],
                                 int)
     gene_mention = Mention("GENE",
                            line_dict["gene_entities"][g_idx],
                            [sentence.words[j] for j in g_wordidxs])
     if line_dict["gene_is_corrects"][g_idx] == "n":
         gene_mention.is_correct = None
     elif line_dict["gene_is_corrects"][g_idx] == "f":
         gene_mention.is_correct = False
     elif line_dict["gene_is_corrects"][g_idx] == "t":
         gene_mention.is_correct = True
     else:
         assert False
     gene_mention.type = line_dict["gene_types"][g_idx]
     assert not gene_mention.type.endswith("_UNSUP")
     for h_idx in range(len(line_dict["hpoterm_is_corrects"])):
         h_wordidxs = TSVstring2list(
             line_dict["hpoterm_wordidxss"][h_idx], int)
         hpoterm_mention = Mention(
             "hpoterm", line_dict["hpoterm_entities"][h_idx],
             [sentence.words[j] for j in h_wordidxs])
         if line_dict["hpoterm_is_corrects"][h_idx] == "n":
             hpoterm_mention.is_correct = None
         elif line_dict["hpoterm_is_corrects"][h_idx] == "f":
             hpoterm_mention.is_correct = False
         elif line_dict["hpoterm_is_corrects"][h_idx] == "t":
             hpoterm_mention.is_correct = True
         else:
             assert False