lambda x: TSVstring2list(x, int), TSVstring2bool, no_op, no_op, lambda x: TSVstring2list(x, int), TSVstring2bool, no_op ]) # Create the sentence object where the two mentions appear sentence = Sentence(line_dict["doc_id"], line_dict["sent_id"], line_dict["wordidxs"], line_dict["words"], line_dict["poses"], line_dict["ners"], line_dict["lemmas"], line_dict["dep_paths"], line_dict["dep_parents"], line_dict["bounding_boxes"]) # Create the mentions gene_1_mention = Mention( "GENE", line_dict["gene_1_entity"], [sentence.words[j] for j in line_dict["gene_1_wordidxs"]]) gene_1_mention.is_correct = line_dict["gene_1_is_correct"] gene_1_mention.type = line_dict["gene_1_type"] gene_2_mention = Mention( "GENE", line_dict["gene_2_entity"], [sentence.words[j] for j in line_dict["gene_2_wordidxs"]]) gene_2_mention.is_correct = line_dict["gene_2_is_correct"] gene_2_mention.type = line_dict["gene_2_type"] # If the word indexes do not overlap, create the relation candidate # TODO there may be other cases. Check with Emily. if not set(line_dict["gene_1_wordidxs"]) & \ set(line_dict["gene_2_wordidxs"]): relation = Relation("GENEGENE", gene_1_mention, gene_2_mention) # Add features add_features(relation, gene_1_mention, gene_2_mention, sentence) # Supervise # One of the two mentions (or both) is labelled as False
TSVstring2list, lambda x: TSVstring2list(x, int), TSVstring2list, no_op, lambda x: TSVstring2list(x, int), TSVstring2bool, no_op, no_op, lambda x: TSVstring2list(x, int), TSVstring2bool, no_op]) # Create the sentence object where the two mentions appear sentence = Sentence( line_dict["doc_id"], line_dict["sent_id"], line_dict["wordidxs"], line_dict["words"], line_dict["poses"], line_dict["ners"], line_dict["lemmas"], line_dict["dep_paths"], line_dict["dep_parents"], line_dict["bounding_boxes"]) # Create the mentions gene_1_mention = Mention( "GENE", line_dict["gene_1_entity"], [sentence.words[j] for j in line_dict["gene_1_wordidxs"]]) gene_1_mention.is_correct = line_dict["gene_1_is_correct"] gene_1_mention.type = line_dict["gene_1_type"] gene_2_mention = Mention( "GENE", line_dict["gene_2_entity"], [sentence.words[j] for j in line_dict["gene_2_wordidxs"]]) gene_2_mention.is_correct = line_dict["gene_2_is_correct"] gene_2_mention.type = line_dict["gene_2_type"] # If the word indexes do not overlap, create the relation candidate # TODO there may be other cases. Check with Emily. if not set(line_dict["gene_1_wordidxs"]) & \ set(line_dict["gene_2_wordidxs"]): relation = Relation( "GENEGENE", gene_1_mention, gene_2_mention) # Add features add_features(relation, gene_1_mention, gene_2_mention, sentence) # Supervise
# Iterate over each pair of (gene,phenotype) mention for g_idx in range(len(line_dict["gene_is_corrects"])): g_wordidxs = TSVstring2list( line_dict["gene_wordidxss"][g_idx], int) gene_mention = Mention( "GENE", line_dict["gene_entities"][g_idx], [sentence.words[j] for j in g_wordidxs]) if line_dict["gene_is_corrects"][g_idx] == "n": gene_mention.is_correct = None elif line_dict["gene_is_corrects"][g_idx] == "f": gene_mention.is_correct = False elif line_dict["gene_is_corrects"][g_idx] == "t": gene_mention.is_correct = True else: assert False gene_mention.type = line_dict["gene_types"][g_idx] assert not gene_mention.type.endswith("_UNSUP") for h_idx in range(len(line_dict["hpoterm_is_corrects"])): h_wordidxs = TSVstring2list( line_dict["hpoterm_wordidxss"][h_idx], int) hpoterm_mention = Mention( "hpoterm", line_dict["hpoterm_entities"][h_idx], [sentence.words[j] for j in h_wordidxs]) if line_dict["hpoterm_is_corrects"][h_idx] == "n": hpoterm_mention.is_correct = None elif line_dict["hpoterm_is_corrects"][h_idx] == "f": hpoterm_mention.is_correct = False elif line_dict["hpoterm_is_corrects"][h_idx] == "t": hpoterm_mention.is_correct = True else: assert False
# Iterate over each pair of (gene,phenotype) mention for g_idx in range(len(line_dict["gene_is_corrects"])): g_wordidxs = TSVstring2list(line_dict["gene_wordidxss"][g_idx], int) gene_mention = Mention("GENE", line_dict["gene_entities"][g_idx], [sentence.words[j] for j in g_wordidxs]) if line_dict["gene_is_corrects"][g_idx] == "n": gene_mention.is_correct = None elif line_dict["gene_is_corrects"][g_idx] == "f": gene_mention.is_correct = False elif line_dict["gene_is_corrects"][g_idx] == "t": gene_mention.is_correct = True else: assert False gene_mention.type = line_dict["gene_types"][g_idx] assert not gene_mention.type.endswith("_UNSUP") for h_idx in range(len(line_dict["hpoterm_is_corrects"])): h_wordidxs = TSVstring2list( line_dict["hpoterm_wordidxss"][h_idx], int) hpoterm_mention = Mention( "hpoterm", line_dict["hpoterm_entities"][h_idx], [sentence.words[j] for j in h_wordidxs]) if line_dict["hpoterm_is_corrects"][h_idx] == "n": hpoterm_mention.is_correct = None elif line_dict["hpoterm_is_corrects"][h_idx] == "f": hpoterm_mention.is_correct = False elif line_dict["hpoterm_is_corrects"][h_idx] == "t": hpoterm_mention.is_correct = True else: assert False