예제 #1
0
    def __init__(self, config=None, group="unknown", dataset="unknown"):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        if config is None:
            config = assocparser.AssocParserConfig()
        self.config = config
        self.report = assocparser.Report(group=group, dataset=dataset)
        self.gpi = None
        if self.config.gpi_authority_path is not None:
            self.gpi = dict()
            parser = entityparser.GpiParser()
            with open(self.config.gpi_authority_path) as gpi_f:
                entities = parser.parse(file=gpi_f)
                for entity in entities:
                    self.gpi[entity["id"]] = {
                        "symbol": entity["label"],
                        "name": entity["full_name"],
                        "synonyms": entitywriter.stringify(entity["synonyms"]),
                        "type": entity["type"]
                    }

                print("Loaded {} entities from {}".format(len(self.gpi.keys()), self.config.gpi_authority_path))
예제 #2
0
    def __init__(self, config=None, group="unknown", dataset="unknown", bio_entities=None):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        self.config = config
        self.group = group
        self.version = None
        self.default_version = "2.1"
        self.bio_entities = bio_entities
        if self.bio_entities is None:
            self.bio_entities = collections.BioEntities(dict())

        self.cell_component_descendants_closure = None

        if config is None:
            self.config = assocparser.AssocParserConfig()
        self.report = assocparser.Report(group=group, dataset=dataset, config=self.config)
        # self.gpi = None
        if self.config.gpi_authority_path is not None:
            gpi_paths = self.config.gpi_authority_path
            if isinstance(gpi_paths, str):
                gpi_paths = [gpi_paths]
            for gpi_path in gpi_paths:
                gpi_bio_entities = collections.BioEntities.load_from_file(gpi_path)
                self.bio_entities.merge(gpi_bio_entities)
                print("Loaded {} entities from {}".format(len(gpi_bio_entities.entities.keys()), gpi_path))
예제 #3
0
def test_model_dates():
    model_associations = []
    version = "2.0"
    report = assocparser.Report(group="unknown", dataset="unknown")
    vals = [
        "MGI:MGI:1915834", "", "RO:0002327", "GO:0003674",
        "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "", "2020-10-09",
        "MGI", "",
        "creation-date=2020-09-17|modification-date=2020-10-09|contributor-id=http://orcid.org/0000-0003-2689-5511"
    ]
    model_associations = model_associations + to_association(
        list(vals), report=report, version=version).associations

    # Different term, earlier creation-date
    vals[3], vals[11] = "GO:0016301", "creation-date=2011-12-13"
    model_associations = model_associations + to_association(
        list(vals), report=report, version=version).associations

    # Different term, no annotation properties
    vals[3], vals[11] = "GO:0001962", ""
    model_associations = model_associations + to_association(
        list(vals), report=report, version=version).associations

    builder = gocam_builder.GoCamBuilder(parser_config=PARSER_CONFIG,
                                         modelstate="test")
    model = builder.translate_to_model(gene="MGI:MGI:1915834",
                                       assocs=model_associations)
    assert model.date == "2020-10-09"
    assert model.creation_date == "2011-12-13"
    assert model.import_date == datetime.date.today().isoformat()
예제 #4
0
    def __init__(self,
                 config=assocparser.AssocParserConfig(),
                 group="unknown",
                 dataset="unknown",
                 bio_entities=None):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        self.config = config
        self.report = assocparser.Report(config=self.config,
                                         group="unknown",
                                         dataset="unknown")
        self.version = None
        self.default_version = "1.2"
        self.bio_entities = bio_entities
        if self.bio_entities is None:
            self.bio_entities = collections.BioEntities(dict())
        if self.config.gpi_authority_path is not None:
            self.bio_entities.merge(
                collections.BioEntities.load_from_file(
                    self.config.gpi_authority_path))
            print("Loaded {} entities from {}".format(
                len(self.bio_entities.entities.keys()),
                self.config.gpi_authority_path))
예제 #5
0
    def __init__(self, config=assocparser.AssocParserConfig()):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        self.config = config
        self.report = assocparser.Report(config=self.config)
예제 #6
0
def test_parse_2_0():
    version = "2.0"
    report = assocparser.Report(group="unknown", dataset="unknown")
    vals = [
        "MGI:MGI:1918911",
        "",
        "RO:0002327",
        "GO:0003674",
        "MGI:MGI:2156816|GO_REF:0000015",
        "ECO:0000307",
        "",
        "",
        "2020-09-17",
        "MGI",
        "",
        "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
    ]
    result = to_association(list(vals), report=report, version=version)
    assert result.skipped == 0
    assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 0
    assert len(result.associations) == 1

    # Annotation_Extensions
    vals[10] = "BFO:0000066(CL:0000010),GOREL:0001004(CL:0000010)"
    result = to_association(list(vals), report=report, version=version)
    assert result.associations[0].object_extensions == [ConjunctiveSet([
            ExtensionUnit(Curie("BFO", "0000066"), Curie("CL", "0000010")),
            ExtensionUnit(Curie("GOREL", "0001004"), Curie("CL", "0000010"))
        ])]

    # With_or_From
    vals[6] = "PR:Q505B8|PR:Q8CHK4"
    result = to_association(list(vals), report=report, version=version)
    assert result.associations[0].evidence.with_support_from == [
        ConjunctiveSet([Curie("PR", "Q505B8")]),
        ConjunctiveSet([Curie("PR", "Q8CHK4")])
    ]

    # Interacting taxon - this example should fail
    vals[7] = "Staphylococcus aureus ; NCBITaxon:1280"
    result = to_association(list(vals), report=report, version=version)
    assert len([m for m in result.report.messages if m["level"] == "ERROR"]) > 0
    assert len(result.associations) == 0
    # Now test valid interacting taxon value
    vals[7] = "NCBITaxon:1280"
    result = to_association(list(vals), report=report, version=version)
    assert result.associations[0].interacting_taxon == Curie("NCBITaxon", "1280")

    # Confirm non-"MGI:MGI:" IDs will parse
    vals[0] = "WB:WBGene00001189"
    result = to_association(list(vals), report=report, version=version)
    assert result.associations[0].subject.id == Curie("WB", "WBGene00001189")

    # Test annotation property retrieval
    contributors = result.associations[0].annotation_property_values(property_key="contributor-id")
    assert set(contributors) == {"http://orcid.org/0000-0003-2689-5511"}
예제 #7
0
    def __init__(self, config=None):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        if config == None:
            config = assocparser.AssocParserConfig()
        self.config = config
        self.report = assocparser.Report()
def test_parse_interacting_taxon():
    report = assocparser.Report(group="unknown", dataset="unknown")
    vals = [
        "MGI", "MGI:1918911", "enables", "GO:0003674",
        "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "taxon:5678",
        "20100209", "MGI", "",
        "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
    ]
    result = to_association(list(vals), report=report, version="1.2")
    assert result.associations[0].interacting_taxon == Curie(
        namespace="NCBITaxon", identity="5678")
예제 #9
0
    def __init__(self, config=None):
        """
        Arguments:
        ---------

        config : a assocparser.AssocParserConfig object
        """
        if config is None:
            config = assocparser.AssocParserConfig()
        self.config = config
        self.report = assocparser.Report()
        self.default_version = "1.2"
        self.version = None
def test_parse_1_2():
    report = assocparser.Report(group="unknown", dataset="unknown")
    vals = [
        "MGI", "MGI:1918911", "enables", "GO:0003674",
        "MGI:MGI:2156816|GO_REF:0000015", "ECO:0000307", "", "", "20100209",
        "MGI", "",
        "creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
    ]
    result = to_association(list(vals), report=report, version="1.2")
    assert result.skipped == 0
    assert len([m for m in result.report.messages
                if m["level"] == "ERROR"]) == 0
    assert len(result.associations) == 1
예제 #11
0
def test_get_with_froms():
    gpi_ents = gocam_builder.GoCamBuilder.parse_gpi(
        gpi_file="tests/resources/mgi2.test_entities.gpi")
    report = assocparser.Report(group="unknown", dataset="unknown")
    vals = [
        "MGI:MGI:1929608",
        "",
        "RO:0001025",
        "GO:0005515",
        "MGI:MGI:3028747|PMID:14627703",
        "ECO:0000353",
        "MGI:MGI:1915834",  # with/from
        "",
        "2004-04-30",
        "MGI",
        "",
        "creation-date=2004-04-30|modification-date=2004-04-30|contributor-id=http://orcid.org/0000-0002-9796-7693"
    ]
    result = to_association(list(vals), report=report, version="2.0")
    go_assoc = result.associations[0]
    ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents)
    with_froms = ca_set.get_with_froms(go_assoc)
    assert len(with_froms) == 1 and \
           with_froms[0].header == ["MGI:MGI:1915834"] and \
           with_froms[0].line == ["MGI:MGI:1915834"]

    vals[6] = "MGI:MGI:1915834|FAKE:12345"
    result = to_association(list(vals), report=report, version="2.0")
    go_assoc = result.associations[0]
    ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents)
    with_froms = ca_set.get_with_froms(go_assoc)
    # FAKE:12345 should be on line since not in GPI nor does it have same taxon as subject MGI:MGI:1929608
    assert len(with_froms) == 2 and \
           with_froms[0].header == ["MGI:MGI:1915834"] and \
           with_froms[0].line == ["MGI:MGI:1915834"] and \
           with_froms[1].header == [] and \
           with_froms[1].line == ["FAKE:12345"]

    # Test merging of same-header with/from values in different order
    ca_set = collapsed_assoc.CollapsedAssociationSet(GO_ONTO, gpi_ents)
    header1 = collapsed_assoc.GoAssocWithFrom(
        header=["MGI:MGI:1915834", "FAKE:12345"])
    header2 = collapsed_assoc.GoAssocWithFrom(
        header=["FAKE:12345", "MGI:MGI:1915834"])
    ca_set.find_or_create_collapsed_association(go_assoc, with_from=header1)
    ca_set.find_or_create_collapsed_association(go_assoc, with_from=header2)
    assert len(ca_set.collapsed_associations
               ) == 1 and ca_set.collapsed_associations[0].with_froms == [
                   "FAKE:12345", "MGI:MGI:1915834"
               ]
예제 #12
0
def extract_properties(annot: GoAssociation):
    annotation_properties = {}
    property_keys = set([prop[0] for prop in annot.properties])
    for pk in property_keys:
        property_values = annot.annotation_property_values(pk)
        # Check for date fields (ex: "creation-date", "modification-date") and parse to Date
        if pk.endswith("date"):
            # Stealing some assocparser stuff here
            dummy_report = assocparser.Report()
            annot_line = annot.source_line.split("\t")
            property_values = [
                assocparser.parse_iso_date(v, dummy_report, annot_line)
                for v in property_values
            ]
        annotation_properties[pk] = property_values
    return annotation_properties
예제 #13
0
    def __init__(self,
                 config=assocparser.AssocParserConfig(),
                 group="unknown",
                 dataset="unknown",
                 bio_entities=None):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        self.config = config
        self.report = assocparser.Report(config=self.config,
                                         group="unknown",
                                         dataset="unknown")
        self.version = None
        self.default_version = "1.2"
        self.bio_entities = bio_entities
        if self.bio_entities is None:
            self.bio_entities = collections.BioEntities(dict())
예제 #14
0
class GeneralAssocParser(assocparser.AssocParser):
    config: assocparser.AssocParserConfig
    group: str
    headers: List[str] = field(default_factory=list)
    report: assocparser.Report = field(
        default_factory=lambda: assocparser.Report())
    bio_entities: BioEntities = field(
        default_factory=lambda: BioEntities(dict()))
    annotation_parser: Optional[assocparser.AssocParser] = None

    def parse_line(self, line):
        parsed = super().validate_line(line)
        if parsed:
            return parsed

        if self.is_header(line):
            if self.annotation_parser is None:
                # We're still looking for a version identifier
                parser = create_parser_from_header(
                    line, self.config, bio_entities=self.bio_entities)
                if parser is not None:
                    self.annotation_parser = parser
                    self.report = parser.report

            self.headers.append(line)
            return assocparser.ParseResult(line, [], skipped=False)

        # At this point, we are not going through headers, and so we should have selected a parser
        if self.annotation_parser is None:
            logger.error("File is bad! We need to bail here!")
            self.report.error("",
                              "Invalid Annotation File",
                              "",
                              "File header is missing file and version",
                              rule=1)
            raise NoVersionInFile("File has no version info", self.report)

        # Just hand off parse responsibility to underlying `annotation_parser`
        return self.annotation_parser.parse_line(line)
예제 #15
0
def construct_collection(
        annotation_path: Optional[str], gpi_paths: List[str],
        config: assocparser.AssocParserConfig) -> AssociationCollection:
    entities = BioEntities(dict())
    for gpi in gpi_paths:
        entities.merge(BioEntities.load_from_file(gpi))

    annotations = []
    headers = []
    report = assocparser.Report()
    if annotation_path:
        parser = GeneralAssocParser(config, "unknown", bio_entities=entities)
        try:
            annotations = parser.parse(annotation_path)
        except NoVersionInFile as e:
            parser.report = e.report
            annotations = []

        headers = parser.headers
        report = parser.report

    return AssociationCollection(headers, GoAssociations(annotations),
                                 entities, report)
예제 #16
0
 def initial():  # type AssociationCollection
     return AssociationCollection([], GoAssociations([]),
                                  BioEntities(dict()), assocparser.Report())