Ejemplo n.º 1
0
def test_gpad_qualifier_removed_in_gaf_2_1():
    # Qualifier is `part_of` and should be returned blank instead of removing the whole line
    line = "PomBase\tSPBC1348.01\tpart_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t"
    parser = gpadparser.GpadParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == ""

    # Test with a `NOT`
    line = "PomBase\tSPBC1348.01\tNOT|part_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t"
    parser = gpadparser.GpadParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == "NOT"
Ejemplo n.º 2
0
def create_parser_from_header(
        line: str,
        config: assocparser.AssocParserConfig,
        group="unknown",
        dataset="unknown",
        bio_entities=None) -> Optional[assocparser.AssocParser]:
    parser = None
    parsed_version = parser_version_regex.findall(line)
    if len(parsed_version) == 1:
        filetype, version, _ = parsed_version[0]
        if filetype in ["gpad", "gpa"]:
            parser = gpadparser.GpadParser(config=config,
                                           bio_entities=bio_entities,
                                           group=group,
                                           dataset=dataset)
            if version in ["1.2", "2.0"]:
                parser.version = version
        elif filetype == "gaf":
            parser = gafparser.GafParser(config=config,
                                         bio_entities=bio_entities,
                                         group=group,
                                         dataset=dataset)
            if version in ["2.1", "2.2"]:
                parser.version = version

    return parser
Ejemplo n.º 3
0
    def parse_gpad_vals_to_gaf_io(gpad_vals):
        parser = gpadparser.GpadParser()
        gaf_out = io.StringIO()
        writer = assocwriter.GafWriter(file=gaf_out)

        assoc = parser.parse_line("\t".join(gpad_vals)).associations[0]
        writer.write_assoc(assoc)
        return gaf_out
Ejemplo n.º 4
0
def test_writing_assoc_properties():
    line = "MGI:MGI:1922721\t\tRO:0002327\tGO:0019904\tMGI:MGI:3769586|PMID:17984326\tECO:0000353\tPR:Q0KK55\t\t2010-12-01\tMGI\tBFO:0000066(EMAPA:17787),RO:0002233(MGI:MGI:1923734)\tcreation-date=2008-02-07|modification-date=2010-12-01|comment=v-KIND domain binding of Kndc1;MGI:1923734|contributor-id=http://orcid.org/0000-0003-2689-5511|contributor-id=http://orcid.org/0000-0003-3394-9805"
    parser = gpadparser.GpadParser()
    parser.version = "2.0"
    out = io.StringIO()
    writer = assocwriter.GpadWriter(
        file=out, version="2.0")  # Write back out to gpad 2.0

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    written_gpad_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    written_props = written_gpad_line.split("\t")[11]
    assert len(written_props.split("|")) == 5
Ejemplo n.º 5
0
    def __init__(self, gpad_file, parser_config: AssocParserConfig):
        self.assocs = []
        self.gpad_parser = gpadparser.GpadParser(config=parser_config)
        with open(gpad_file) as sg:
            lines = sum(1 for line in sg)

        with open(gpad_file) as gf:
            click.echo("Making products...")
            with click.progressbar(
                    iterable=self.gpad_parser.association_generator(
                        file=gf, skipheader=True),
                    length=lines) as associations:
                self.assocs = list(associations)

        self.entity_parents = self.parse_gpi_parents(
            parser_config.gpi_authority_path)
Ejemplo n.º 6
0
def read_gpad_csv(filename, version) -> pd:
    if version.startswith("1"):
        data_frame = pd.read_csv(filename,
                                 comment='!',
                                 sep='\t',
                                 header=None,
                                 na_filter=False,
                                 names=gpad_1_2_format).fillna("")
        df = data_frame.filter(['db', 'subject', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'], axis=1)
        concat_column = df['db'] + ":" + df['subject']
        df['concat_column'] = concat_column
        filtered_df = df.filter(['concat_column', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'])
        filtered_df.rename(columns={'concat_column': 'subject'}, inplace=True)
        new_df = filtered_df
    else:
        data_frame = pd.read_csv(filename,
                                 comment='!',
                                 sep='\t',
                                 header=None,
                                 na_filter=False,
                                 names=gpad_2_0_format).fillna("")
        new_df = data_frame.filter(['subject', 'negation', 'relation', 'object', 'evidence_code', 'reference'], axis=1)
    ecomapping = ecomap.EcoMap()
    for eco_code in ecomapping.mappings():
        for ev in new_df['evidence_code']:
            if eco_code[2] == ev:
                new_df['evidence_code'] = new_df['evidence_code'].replace([eco_code[2]],
                                                                          ecomapping.ecoclass_to_coderef(eco_code[2])[0])

    # normalize ids
    config = assocparser.AssocParserConfig()
    config.remove_double_prefixes = True
    parser = gpadparser.GpadParser(config=config)
    for i, r in enumerate(new_df['subject']):
        r1 = parser._normalize_id(r)
        new_df.at[i, 'subject'] = r1

    return new_df