Beispiel #1
0
def test_gaf_to_gpad2():
    line = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:999|taxon:888\t20150305\tPomBase\tpart_of(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GpadWriter(version=assocwriter.GPAD_2_0, file=out)

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)

    lines = out.getvalue().split("\n")
    assert lines[0] == "!gpa-version: 2.0"
    assert lines[
        1] == "PomBase:SPAC25B8.17\t\tBFO:0000050\tGO:0000006\tGO_REF:0000024\tECO:0000266\tSGD:S000001583\tNCBITaxon:888\t2015-03-05\tPomBase\tBFO:0000050(X:1)\t"

    line = "PomBase\tSPAC25B8.17\typf1\tNOT\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:999|taxon:888\t20150305\tPomBase\tpart_of(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GpadWriter(version=assocwriter.GPAD_2_0, file=out)

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)

    lines = out.getvalue().split("\n")
    assert lines[0] == "!gpa-version: 2.0"
    assert lines[
        1] == "PomBase:SPAC25B8.17\tNOT\tBFO:0000050\tGO:0000006\tGO_REF:0000024\tECO:0000266\tSGD:S000001583\tNCBITaxon:888\t2015-03-05\tPomBase\tBFO:0000050(X:1)\t"
Beispiel #2
0
def test_roundtrip():
    """
    Start with a line, parse it, then write it. The beginning line should be the same as what was written.
    """
    line = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:999|taxon:888\t20150305\tPomBase\tfoo(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    assoc_dict = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc_dict)
    gaf = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert line == gaf

    # Single taxon
    line = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:1111\t20150305\tPomBase\tfoo(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    assoc_dict = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc_dict)
    gaf = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert line == gaf
Beispiel #3
0
def test_gaf2_2_qualifier_to_gaf2_1():
    # Qualifier is `part_of` and should be returned blank instead of removing the whole line
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == ""

    # Test with a `NOT`
    line = "WB\tWBGene00000001\taap-1\tNOT|involved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == "NOT"
Beispiel #4
0
def test_no_colon_in_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id("FOOBAR", "")

    assert not valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
Beispiel #5
0
def test_validate_pipe_separated():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids("PMID:12345", "")
    assert set(ids) == set(["PMID:12345"])

    ids = parser.validate_pipe_separated_ids("PMID:12345|PMID:11111", "")
    assert set(ids) == set(["PMID:12345", "PMID:11111"])
Beispiel #6
0
def test_empty_pre_colon():
    parser = gafparser.GafParser()
    valid = parser._validate_id(":123", "")

    assert not valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
Beispiel #7
0
def create_parser_from_header(
        line: str,
        config: assocparser.AssocParserConfig,
        group="unknown",
        dataset="unknown",
        bio_entities=None) -> Optional[assocparser.AssocParser]:
    parser = None
    parsed_version = parser_version_regex.findall(line)
    if len(parsed_version) == 1:
        filetype, version, _ = parsed_version[0]
        if filetype in ["gpad", "gpa"]:
            parser = gpadparser.GpadParser(config=config,
                                           bio_entities=bio_entities,
                                           group=group,
                                           dataset=dataset)
            if version in ["1.2", "2.0"]:
                parser.version = version
        elif filetype == "gaf":
            parser = gafparser.GafParser(config=config,
                                         bio_entities=bio_entities,
                                         group=group,
                                         dataset=dataset)
            if version in ["2.1", "2.2"]:
                parser.version = version

    return parser
Beispiel #8
0
def test_pipe_in_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id("F|OO:123", "")

    assert valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.WARNING
Beispiel #9
0
def test_bad_character_in_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id("FOO:1&23", "")

    assert not valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
Beispiel #10
0
def test_validate_with_allowed_ids():
    parser = gafparser.GafParser()
    valid = parser._validate_id("FOO:123",
                                assocparser.SplitLine("", [""] * 17,
                                                      "taxon:foo"),
                                allowed_ids=["FOO"])
    assert valid
Beispiel #11
0
def test_empty_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id(
        "", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
    assert not valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
Beispiel #12
0
def test_pipe_in_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id(
        "F|OO:123", assocparser.SplitLine("", [""] * 17, "taxon:foo"))

    assert valid
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.WARNING
Beispiel #13
0
def test_normalize_refs_good_and_bad_refs():
    parser = gafparser.GafParser()
    refs = parser.normalize_refs(["FB:123", "PMID:234"],
                                 assocparser.SplitLine("", [""] * 17,
                                                       "taxon:foo"))
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0][
        "type"] == assocparser.Report.INVALID_IDSPACE
Beispiel #14
0
def test_validate_pipe_separated_empty_allowed():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids("",
                                             assocparser.SplitLine(
                                                 "", [""] * 17, "taxon:foo"),
                                             empty_allowed=True)

    assert ids == []
Beispiel #15
0
def test_gaf_2_2_extensions():
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\tpart_of(EMAPA:17972),part_of(CL:0000018)\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    assoc = parser.parse_line(line).associations[0]

    gaf_22_out = assoc.to_gaf_2_2_tsv()
    assert gaf_22_out[15] == "part_of(EMAPA:17972),part_of(CL:0000018)"
Beispiel #16
0
def test_validate_with_disallowed_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id("FOO:123",
                                assocparser.SplitLine("", [""] * 17,
                                                      "taxon:foo"),
                                allowed_ids=["BAR"])
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0]["level"] == assocparser.Report.WARNING
Beispiel #17
0
def test_validate_pipe_separated_with_bad_ids():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids("PMID:123[2]|PMID:11111", "")

    assert ids == None

    ids = parser.validate_pipe_separated_ids("PMID:123[2]", "")
    assert ids == None
Beispiel #18
0
def test_writing_to_gaf_2_2():
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    assoc = parser.parse_line(line).associations[0]  # type: GoAssociation

    gaf_22_out = assoc.to_gaf_2_2_tsv()
    assert gaf_22_out[3] == "involved_in"

    # With NOT
    line = "WB\tWBGene00000001\taap-1\tNOT|involved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"

    assoc = parser.parse_line(line).associations[0]  # type: GoAssociation

    gaf_22_out = assoc.to_gaf_2_2_tsv()
    assert gaf_22_out[3] == "NOT|involved_in"
Beispiel #19
0
def test_normalize_refs_single_bad_ref():
    parser = gafparser.GafParser()
    ref = parser.normalize_refs(["FB:123"],
                                assocparser.SplitLine("", [""] * 17,
                                                      "taxon:foo"))
    assert ref == ["FB:123"]
    assert len(parser.report.messages) == 1
    assert parser.report.messages[0][
        "type"] == assocparser.Report.INVALID_IDSPACE
Beispiel #20
0
def test_validate_pipe_separated():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids(
        "PMID:12345", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
    assert set(ids) == set(["PMID:12345"])

    ids = parser.validate_pipe_separated_ids(
        "PMID:12345|PMID:11111",
        assocparser.SplitLine("", [""] * 17, "taxon:foo"))
    assert set(ids) == set(["PMID:12345", "PMID:11111"])
Beispiel #21
0
def create_base_parser(format: FormatType) -> Optional[assocparser.AssocParser]:
    """
    Make an unconfigured parser based on the format. Only GAF is supported currently.
    """
    parser = None
    if format == FormatType.GAF:
        parser = gafparser.GafParser(config=assocparser.AssocParserConfig())
    else:
        parser = None

    return parser
Beispiel #22
0
def test_validate_pipe_separated_with_bad_ids():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids(
        "PMID:123[2]|PMID:11111",
        assocparser.SplitLine("", [""] * 17, "taxon:foo"))

    assert ids == None

    ids = parser.validate_pipe_separated_ids(
        "PMID:123[2]", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
    assert ids == None
Beispiel #23
0
def test_validate_pipe_with_additional_delims():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids("F:123,B:234|B:111",
                                             "",
                                             extra_delims=",")

    assert set(ids) == set(["F:123", "B:234", "B:111"])

    result = parser.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:1990578\tGO_REF:0000024\tISO\tUniProtKB:Q9CXD9|ensembl:ENSMUSP00000038569,PMID:11111\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20150305\tPomBase\t\t"
    )
    assert set(result.associations[0]["evidence"]["with_support_from"]) == set(
        ["UniProtKB:Q9CXD9", "ensembl:ENSMUSP00000038569", "PMID:11111"])
Beispiel #24
0
def test_full_gaf_2_2_write():
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out, version="2.2")

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    out_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert out_line.split("\t") == line.split("\t")
Beispiel #25
0
def test_gpad_iba_writing():
    out = io.StringIO()
    parser = gafparser.GafParser()
    parser.config = assocparser.AssocParserConfig(paint=True)
    writer = assocwriter.GpadWriter(file=out)

    for assoc in parser.association_generator(
            skipheader=True, file=open("tests/resources/wb_single_iba.gaf")):
        writer.write_assoc(assoc)

    outlines = out.getvalue().split("\n")

    expected_lines = [
        "!gpa-version: 1.1",
        "WB\tWBGene00022144\tpart_of\tGO:0005886\tPMID:21873635\tECO:0000318\tPANTHER:PTN000073732|RGD:3252\t\t20180308\tGO_Central\t\t",
        ""
    ]
    assert expected_lines == outlines
Beispiel #26
0
def test_negated_qualifers():
    gaf = [
        "PomBase", "SPBC11B10.09", "cdc2", "NOT", "GO:0007275",
        "PMID:21873635", "ISO", "PANTHER:PTN000623979|TAIR:locus:2099478", "P",
        "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043",
        "protein", "taxon:284812", "20170228", "GO_Central", "", ""
    ]
    parser = gafparser.GafParser()
    result = parser.parse_line("\t".join(gaf))
    writer = assocwriter.GafWriter()
    parsed = writer.as_tsv(result.associations[0])
    print(parsed)
    assert parsed[3] == "NOT"

    writer = assocwriter.GpadWriter()
    parsed = writer.as_tsv(result.associations[0])
    print(parsed)
    assert parsed[2] == "NOT|involved_in"
Beispiel #27
0
def test_validate_pipe_with_additional_delims():
    parser = gafparser.GafParser()
    ids = parser.validate_pipe_separated_ids("F:123,B:234|B:111",
                                             assocparser.SplitLine(
                                                 "", [""] * 17, "taxon:foo"),
                                             extra_delims=",")

    assert set(ids) == set(["F:123", "B:234", "B:111"])

    result = parser.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:1990578\tGO_REF:0000024\tISO\tUniProtKB:Q9CXD9|ensembl:ENSMUSP00000038569,PMID:11111\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20150305\tPomBase\t\t"
    )
    expected = [
        association.ConjunctiveSet(
            elements=[association.Curie.from_str("UniProtKB:Q9CXD9")]),
        association.ConjunctiveSet(elements=[
            association.Curie.from_str("ensembl:ENSMUSP00000038569"),
            association.Curie.from_str("PMID:11111")
        ])
    ]
    assert result.associations[0].evidence.with_support_from == expected
Beispiel #28
0
def test_normalize_refs_good():
    parser = gafparser.GafParser()
    refs = parser.normalize_refs(["PMID:123"],
                                 assocparser.SplitLine("", [""] * 17,
                                                       "taxon:foo"))
    assert refs == ["PMID:123"]
Beispiel #29
0
def _association_parser(association_type, config):
    if association_type == "gaf":
        return gafparser.GafParser(config=config)
Beispiel #30
0
def test_doi_id():
    parser = gafparser.GafParser()
    valid = parser._validate_id(
        "DOI:10.1007/BF00127499",
        assocparser.SplitLine("", [""] * 17, "taxon:foo"))
    assert valid