def __init__(self, config=None, group="unknown", dataset="unknown"): """ Arguments: --------- config : a AssocParserConfig object """ if config is None: config = assocparser.AssocParserConfig() self.config = config self.report = assocparser.Report(group=group, dataset=dataset) self.gpi = None if self.config.gpi_authority_path is not None: self.gpi = dict() parser = entityparser.GpiParser() with open(self.config.gpi_authority_path) as gpi_f: entities = parser.parse(file=gpi_f) for entity in entities: self.gpi[entity["id"]] = { "symbol": entity["label"], "name": entity["full_name"], "synonyms": entitywriter.stringify(entity["synonyms"]), "type": entity["type"] } print("Loaded {} entities from {}".format(len(self.gpi.keys()), self.config.gpi_authority_path))
def __init__(self, config=None, group="unknown", dataset="unknown", bio_entities=None): """ Arguments: --------- config : a AssocParserConfig object """ self.config = config self.group = group self.version = None self.default_version = "2.1" self.bio_entities = bio_entities if self.bio_entities is None: self.bio_entities = collections.BioEntities(dict()) self.cell_component_descendants_closure = None if config is None: self.config = assocparser.AssocParserConfig() self.report = assocparser.Report(group=group, dataset=dataset, config=self.config) # self.gpi = None if self.config.gpi_authority_path is not None: gpi_paths = self.config.gpi_authority_path if isinstance(gpi_paths, str): gpi_paths = [gpi_paths] for gpi_path in gpi_paths: gpi_bio_entities = collections.BioEntities.load_from_file(gpi_path) self.bio_entities.merge(gpi_bio_entities) print("Loaded {} entities from {}".format(len(gpi_bio_entities.entities.keys()), gpi_path))
def test_model_dates(): model_associations = [] version = "2.0" report = assocparser.Report(group="unknown", dataset="unknown") vals = [ "MGI:MGI:1915834", "", "RO:0002327", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "", "2020-10-09", "MGI", "", "creation-date=2020-09-17|modification-date=2020-10-09|contributor-id=http://orcid.org/0000-0003-2689-5511" ] model_associations = model_associations + to_association( list(vals), report=report, version=version).associations # Different term, earlier creation-date vals[3], vals[11] = "GO:0016301", "creation-date=2011-12-13" model_associations = model_associations + to_association( list(vals), report=report, version=version).associations # Different term, no annotation properties vals[3], vals[11] = "GO:0001962", "" model_associations = model_associations + to_association( list(vals), report=report, version=version).associations builder = gocam_builder.GoCamBuilder(parser_config=PARSER_CONFIG, modelstate="test") model = builder.translate_to_model(gene="MGI:MGI:1915834", assocs=model_associations) assert model.date == "2020-10-09" assert model.creation_date == "2011-12-13" assert model.import_date == datetime.date.today().isoformat()
def __init__(self, config=assocparser.AssocParserConfig(), group="unknown", dataset="unknown", bio_entities=None): """ Arguments: --------- config : a AssocParserConfig object """ self.config = config self.report = assocparser.Report(config=self.config, group="unknown", dataset="unknown") self.version = None self.default_version = "1.2" self.bio_entities = bio_entities if self.bio_entities is None: self.bio_entities = collections.BioEntities(dict()) if self.config.gpi_authority_path is not None: self.bio_entities.merge( collections.BioEntities.load_from_file( self.config.gpi_authority_path)) print("Loaded {} entities from {}".format( len(self.bio_entities.entities.keys()), self.config.gpi_authority_path))
def __init__(self, config=assocparser.AssocParserConfig()): """ Arguments: --------- config : a AssocParserConfig object """ self.config = config self.report = assocparser.Report(config=self.config)
def test_parse_2_0(): version = "2.0" report = assocparser.Report(group="unknown", dataset="unknown") vals = [ "MGI:MGI:1918911", "", "RO:0002327", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "", "2020-09-17", "MGI", "", "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511" ] result = to_association(list(vals), report=report, version=version) assert result.skipped == 0 assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 0 assert len(result.associations) == 1 # Annotation_Extensions vals[10] = "BFO:0000066(CL:0000010),GOREL:0001004(CL:0000010)" result = to_association(list(vals), report=report, version=version) assert result.associations[0].object_extensions == [ConjunctiveSet([ ExtensionUnit(Curie("BFO", "0000066"), Curie("CL", "0000010")), ExtensionUnit(Curie("GOREL", "0001004"), Curie("CL", "0000010")) ])] # With_or_From vals[6] = "PR:Q505B8|PR:Q8CHK4" result = to_association(list(vals), report=report, version=version) assert result.associations[0].evidence.with_support_from == [ ConjunctiveSet([Curie("PR", "Q505B8")]), ConjunctiveSet([Curie("PR", "Q8CHK4")]) ] # Interacting taxon - this example should fail vals[7] = "Staphylococcus aureus ; NCBITaxon:1280" result = to_association(list(vals), report=report, version=version) assert len([m for m in result.report.messages if m["level"] == "ERROR"]) > 0 assert len(result.associations) == 0 # Now test valid interacting taxon value vals[7] = "NCBITaxon:1280" result = to_association(list(vals), report=report, version=version) assert result.associations[0].interacting_taxon == Curie("NCBITaxon", "1280") # Confirm non-"MGI:MGI:" IDs will parse vals[0] = "WB:WBGene00001189" result = to_association(list(vals), report=report, version=version) assert result.associations[0].subject.id == Curie("WB", "WBGene00001189") # Test annotation property retrieval contributors = result.associations[0].annotation_property_values(property_key="contributor-id") assert set(contributors) == {"http://orcid.org/0000-0003-2689-5511"}
def __init__(self, config=None): """ Arguments: --------- config : a AssocParserConfig object """ if config == None: config = assocparser.AssocParserConfig() self.config = config self.report = assocparser.Report()
def test_parse_interacting_taxon(): report = assocparser.Report(group="unknown", dataset="unknown") vals = [ "MGI", "MGI:1918911", "enables", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "taxon:5678", "20100209", "MGI", "", "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511" ] result = to_association(list(vals), report=report, version="1.2") assert result.associations[0].interacting_taxon == Curie( namespace="NCBITaxon", identity="5678")
def __init__(self, config=None): """ Arguments: --------- config : a assocparser.AssocParserConfig object """ if config is None: config = assocparser.AssocParserConfig() self.config = config self.report = assocparser.Report() self.default_version = "1.2" self.version = None
def test_parse_1_2(): report = assocparser.Report(group="unknown", dataset="unknown") vals = [ "MGI", "MGI:1918911", "enables", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "", "20100209", "MGI", "", "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511" ] result = to_association(list(vals), report=report, version="1.2") assert result.skipped == 0 assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 0 assert len(result.associations) == 1
def test_get_with_froms(): gpi_ents = gocam_builder.GoCamBuilder.parse_gpi( gpi_file="tests/resources/mgi2.test_entities.gpi") report = assocparser.Report(group="unknown", dataset="unknown") vals = [ "MGI:MGI:1929608", "", "RO:0001025", "GO:0005515", "MGI:MGI:3028747|PMID:14627703", "ECO:0000353", "MGI:MGI:1915834", # with/from "", "2004-04-30", "MGI", "", "creation-date=2004-04-30|modification-date=2004-04-30|contributor-id=http://orcid.org/0000-0002-9796-7693" ] result = to_association(list(vals), report=report, version="2.0") go_assoc = result.associations[0] ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents) with_froms = ca_set.get_with_froms(go_assoc) assert len(with_froms) == 1 and \ with_froms[0].header == ["MGI:MGI:1915834"] and \ with_froms[0].line == ["MGI:MGI:1915834"] vals[6] = "MGI:MGI:1915834|FAKE:12345" result = to_association(list(vals), report=report, version="2.0") go_assoc = result.associations[0] ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents) with_froms = ca_set.get_with_froms(go_assoc) # FAKE:12345 should be on line since not in GPI nor does it have same taxon as subject MGI:MGI:1929608 assert len(with_froms) == 2 and \ with_froms[0].header == ["MGI:MGI:1915834"] and \ with_froms[0].line == ["MGI:MGI:1915834"] and \ with_froms[1].header == [] and \ with_froms[1].line == ["FAKE:12345"] # Test merging of same-header with/from values in different order ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents) header1 = collapsed_assoc.GoAssocWithFrom( header=["MGI:MGI:1915834", "FAKE:12345"]) header2 = collapsed_assoc.GoAssocWithFrom( header=["FAKE:12345", "MGI:MGI:1915834"]) ca_set.find_or_create_collapsed_association(go_assoc, with_from=header1) ca_set.find_or_create_collapsed_association(go_assoc, with_from=header2) assert len(ca_set.collapsed_associations ) == 1 and ca_set.collapsed_associations[0].with_froms == [ "FAKE:12345", "MGI:MGI:1915834" ]
def extract_properties(annot: GoAssociation): annotation_properties = {} property_keys = set([prop[0] for prop in annot.properties]) for pk in property_keys: property_values = annot.annotation_property_values(pk) # Check for date fields (ex: "creation-date", "modification-date") and parse to Date if pk.endswith("date"): # Stealing some assocparser stuff here dummy_report = assocparser.Report() annot_line = annot.source_line.split("\t") property_values = [ assocparser.parse_iso_date(v, dummy_report, annot_line) for v in property_values ] annotation_properties[pk] = property_values return annotation_properties
def __init__(self, config=assocparser.AssocParserConfig(), group="unknown", dataset="unknown", bio_entities=None): """ Arguments: --------- config : a AssocParserConfig object """ self.config = config self.report = assocparser.Report(config=self.config, group="unknown", dataset="unknown") self.version = None self.default_version = "1.2" self.bio_entities = bio_entities if self.bio_entities is None: self.bio_entities = collections.BioEntities(dict())
class GeneralAssocParser(assocparser.AssocParser): config: assocparser.AssocParserConfig group: str headers: List[str] = field(default_factory=list) report: assocparser.Report = field( default_factory=lambda: assocparser.Report()) bio_entities: BioEntities = field( default_factory=lambda: BioEntities(dict())) annotation_parser: Optional[assocparser.AssocParser] = None def parse_line(self, line): parsed = super().validate_line(line) if parsed: return parsed if self.is_header(line): if self.annotation_parser is None: # We're still looking for a version identifier parser = create_parser_from_header( line, self.config, bio_entities=self.bio_entities) if parser is not None: self.annotation_parser = parser self.report = parser.report self.headers.append(line) return assocparser.ParseResult(line, [], skipped=False) # At this point, we are not going through headers, and so we should have selected a parser if self.annotation_parser is None: logger.error("File is bad! We need to bail here!") self.report.error("", "Invalid Annotation File", "", "File header is missing file and version", rule=1) raise NoVersionInFile("File has no version info", self.report) # Just hand off parse responsibility to underlying `annotation_parser` return self.annotation_parser.parse_line(line)
def construct_collection( annotation_path: Optional[str], gpi_paths: List[str], config: assocparser.AssocParserConfig) -> AssociationCollection: entities = BioEntities(dict()) for gpi in gpi_paths: entities.merge(BioEntities.load_from_file(gpi)) annotations = [] headers = [] report = assocparser.Report() if annotation_path: parser = GeneralAssocParser(config, "unknown", bio_entities=entities) try: annotations = parser.parse(annotation_path) except NoVersionInFile as e: parser.report = e.report annotations = [] headers = parser.headers report = parser.report return AssociationCollection(headers, GoAssociations(annotations), entities, report)
def initial(): # type AssociationCollection return AssociationCollection([], GoAssociations([]), BioEntities(dict()), assocparser.Report())