def make_conjunctions(extension: List) -> association.ExtensionConjunctions: extension_units = [] # type: List[association.ExtensionUnit] for unit in extension: extension_units.append( association.ExtensionUnit(unit["relation"], unit["term"])) return association.ExtensionConjunctions(frozenset(extension_units))
def make_keys_from_gaf(gaf: List[str]) -> List[AnnotationKey]: term = curie_util.expand_uri(gaf[4], cmaps=[prefix_context]) relation = aspect_relation_map[gaf[8]] taxon = "http://purl.obolibrary.org/obo/NCBITaxon_{}".format( gaf[12].split("|")[0].split(":")[1]) extension = gaf[15] annotation_keys = [] # type: List[AnnotationKey] for conjunction in extension.split("|"): # conjunction is foo(bar),hello(world) conjunctions = [] # type: List[association.ExtensionUnit] for extension_unit in conjunction.split(","): # extension_unit is foo(bar) found_rel = relation_tuple.match(extension_unit) if found_rel: rel_label, filler = found_rel.groups() ext_relation = lookup_relation(rel_label) # type: Uri fill_id = curie_util.expand_uri(filler, cmaps=[prefix_context ]) # type: Uri extension_unit = association.ExtensionUnit( ext_relation, fill_id) # type: association.ExtensionUnit # Append the extensions unit to the list of conjunctions conjunctions.append(extension_unit) extension_conjunction = association.ExtensionConjunctions( frozenset(conjunctions)) # Build the Key now annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, extension_conjunction)) return annotation_keys
def test_build_annotation_inferences(): with open("tests/resources/test.inferences.json") as inferences_file: gaferences = json.load(inferences_file) inferences = gaference.build_annotation_inferences(gaferences) akey = gaference.AnnotationKey( gaference.RelationTo("http://purl.obolibrary.org/obo/BFO_0000050", "http://purl.obolibrary.org/obo/GO_0036064"), "http://purl.obolibrary.org/obo/NCBITaxon_10090", association.ExtensionConjunctions( frozenset([ association.ExtensionUnit( "http://purl.obolibrary.org/obo/BFO_0000050", "http://purl.obolibrary.org/obo/EMAPA_17168"), association.ExtensionUnit( "http://purl.obolibrary.org/obo/BFO_0000050", "http://purl.obolibrary.org/obo/CL_0010009") ]))) val = inferences[akey] expected = gaference.InferenceValue(True, False, [ gaference.RelationTo("http://purl.obolibrary.org/obo/BFO_0000050", "http://purl.obolibrary.org/obo/GO_0097458") ]) assert val == expected
def make_keys_from_gaf(gaf: association.GoAssociation) -> List[AnnotationKey]: term = curie_util.expand_uri(gaf.object.id, cmaps=[prefix_context]) relation = curie_util.expand_uri(gaf.relation, cmaps=[prefix_context]) taxon = curie_util.expand_uri(gaf.object.taxon, cmaps=[prefix_context]) extensions = gaf.object_extensions annotation_keys = [] # type: List[AnnotationKey] if extensions.conjunctions: for conjunction in extensions.conjunctions: # conjunction is foo(bar),hello(world) extension_conjunction = association.ExtensionConjunctions( frozenset(conjunction.extensions)) # Build the Key now annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, extension_conjunction)) else: annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, association.ExtensionConjunctions(frozenset([])))) return annotation_keys
def to_association(gaf_line: List[str], report=None, group="unknown", dataset="unknown") -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report source_line = "\t".join(gaf_line) if source_line == "": report.error(source_line, "Blank Line", "EMPTY", "Blank lines are not allowed", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if len(gaf_line) > 17: # If we see more than 17 columns, we will just cut off the columns after column 17 report.warning( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.", rule=1) gaf_line = gaf_line[:17] if 17 > len(gaf_line) >= 15: gaf_line += [""] * (17 - len(gaf_line)) if len(gaf_line) != 17: report.error( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)" .format(columns=len(gaf_line)), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 TAXON_INDEX = 12 REFERENCE_INDEX = 5 if gaf_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[TAXON_INDEX] == "": report.error(source_line, Report.INVALID_TAXON, "EMPTY", "taxon column is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) taxon = gaf_line[12].split("|") taxon_curie = taxon[0].replace("taxon", "NCBITaxon") interacting_taxon = taxon[1].replace( "taxon", "NCBITaxon") if len(taxon) == 2 else None subject_curie = "{db}:{id}".format(db=gaf_line[0], id=gaf_line[1]) subject = association.Subject(subject_curie, gaf_line[2], gaf_line[9], gaf_line[10].split("|"), gaf_line[11], taxon_curie) aspect = gaf_line[8] negated, relation, qualifiers = assocparser._parse_qualifier( gaf_line[3], aspect) # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers for q in qualifiers: if q not in allowed_qualifiers: report.error( source_line, Report.INVALID_QUALIFIER, q, "Qualifiers must be `contributes_to`, `colocalizes_with`, or `NOT`", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) object = association.Term(gaf_line[4], taxon_curie) evidence = association.Evidence(ecomap.coderef_to_ecoclass(gaf_line[6]), [e for e in gaf_line[5].split("|") if e], [e for e in gaf_line[7].split("|") if e]) subject_extensions = [ association.ExtensionUnit("rdfs:subClassOf", gaf_line[16]) ] if gaf_line[16] else [] conjunctions = [] if gaf_line[15]: for conjuncts in gaf_line[15].split("|"): extension_units = [] for u in conjuncts.split(","): parsed = relation_tuple.findall(u) if len(parsed) == 1: rel, term = parsed[0] extension_units.append(association.ExtensionUnit( rel, term)) else: # Otherwise, something went bad with the regex, and it's a bad parse report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, u, "extensions should be relation(curie)", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) conjunction = association.ExtensionConjunctions(extension_units) conjunctions.append(conjunction) object_extensions = association.ExtensionExpression(conjunctions) looked_up_rel = relations.lookup_label(relation) if looked_up_rel is None: report.error( source_line, assocparser.Report.INVALID_QUALIFIER, relation, "Qualifer must be \"colocalizes_with\", \"contributes_to\", or \"NOT\"", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) a = association.GoAssociation( source_line="\t".join(gaf_line), subject=subject, relation=curie_util.contract_uri(looked_up_rel)[0], object=object, negated=negated, qualifiers=qualifiers, aspect=aspect, interacting_taxon=interacting_taxon, evidence=evidence, subject_extensions=subject_extensions, object_extensions=object_extensions, provided_by=gaf_line[14], date=gaf_line[13], properties={}) return assocparser.ParseResult(source_line, [a], False, report=report)
def to_association(gpad_line: List[str], report=None, group="unknown", dataset="unknown") -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report source_line = "\t".join(gpad_line) if len(gpad_line) > 12: report.warning( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were more than 12 columns in this line. Proceeding by cutting off extra columns.", rule=1) gpad_line = gpad_line[:12] if 12 > len(gpad_line) >= 10: gpad_line += [""] * (12 - len(gpad_line)) if len(gpad_line) != 12: report.error( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were {columns} columns found in this line, and there should be between 10 and 12" .format(columns=len(gpad_line))) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 QUALIFIER = 2 REFERENCE_INDEX = 4 EVIDENCE_INDEX = 5 if gpad_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[QUALIFIER] == "": report.error(source_line, Report.INVALID_TAXON, "EMPTY", "qualifier column is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[EVIDENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "Evidence column is empty", rule=1) taxon = "" subject_curie = "{db}:{id}".format(db=gpad_line[0], id=gpad_line[1]) subject = association.Subject(subject_curie, "", "", [], "", "") object = association.Term(gpad_line[3], "") evidence = association.Evidence(gpad_line[5], [e for e in gpad_line[4].split("|") if e], [e for e in gpad_line[6].split("|") if e]) raw_qs = gpad_line[2].split("|") negated = "NOT" in raw_qs looked_up_qualifiers = [ relations.lookup_label(q) for q in raw_qs if q != "NOT" ] if None in looked_up_qualifiers: report.error(source_line, Report.INVALID_QUALIFIER, raw_qs, "Could not find a URI for qualifier", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) qualifiers = [curie_util.contract_uri(q)[0] for q in looked_up_qualifiers] conjunctions = [] if gpad_line[11]: for conjuncts in gpad_line[11].split("|"): extension_units = [] for u in conjuncts.split(","): parsed = relation_tuple.findall(u) if len(parsed) == 1: rel, term = parsed[0] extension_units.append(association.ExtensionUnit( rel, term)) else: # Otherwise, something went bad with the regex, and it's a bad parse report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, u, "extensions should be relation(curie)", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) conjunction = association.ExtensionConjunctions(extension_units) conjunctions.append(conjunction) object_extensions = association.ExtensionExpression(conjunctions) properties_list = [ prop.split("=") for prop in gpad_line[11].split("|") if prop ] # print(properties_list) a = association.GoAssociation( source_line="\t".join(gpad_line), subject=subject, relation="", object=object, negated=negated, qualifiers=qualifiers, aspect=None, interacting_taxon=gpad_line[7], evidence=evidence, subject_extensions=[], object_extensions=object_extensions, provided_by=gpad_line[9], date=gpad_line[8], properties={prop[0]: prop[1] for prop in properties_list if prop}) return assocparser.ParseResult(source_line, [a], False, report=report)