def test_gpad_qualifier_removed_in_gaf_2_1(): # Qualifier is `part_of` and should be returned blank instead of removing the whole line line = "PomBase\tSPBC1348.01\tpart_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t" parser = gpadparser.GpadParser() out = io.StringIO() writer = assocwriter.GafWriter(file=out, version="2.1") # Write out to gaf 2.1 assoc = parser.parse_line(line).associations[0] writer.write_assoc(assoc) gpad_to_gaf_line = [ line for line in out.getvalue().split("\n") if not line.startswith("!") ][0] assert gpad_to_gaf_line.split("\t")[3] == "" # Test with a `NOT` line = "PomBase\tSPBC1348.01\tNOT|part_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t" parser = gpadparser.GpadParser() out = io.StringIO() writer = assocwriter.GafWriter(file=out, version="2.1") # Write out to gaf 2.1 assoc = parser.parse_line(line).associations[0] writer.write_assoc(assoc) gpad_to_gaf_line = [ line for line in out.getvalue().split("\n") if not line.startswith("!") ][0] assert gpad_to_gaf_line.split("\t")[3] == "NOT"
def create_parser_from_header( line: str, config: assocparser.AssocParserConfig, group="unknown", dataset="unknown", bio_entities=None) -> Optional[assocparser.AssocParser]: parser = None parsed_version = parser_version_regex.findall(line) if len(parsed_version) == 1: filetype, version, _ = parsed_version[0] if filetype in ["gpad", "gpa"]: parser = gpadparser.GpadParser(config=config, bio_entities=bio_entities, group=group, dataset=dataset) if version in ["1.2", "2.0"]: parser.version = version elif filetype == "gaf": parser = gafparser.GafParser(config=config, bio_entities=bio_entities, group=group, dataset=dataset) if version in ["2.1", "2.2"]: parser.version = version return parser
def parse_gpad_vals_to_gaf_io(gpad_vals): parser = gpadparser.GpadParser() gaf_out = io.StringIO() writer = assocwriter.GafWriter(file=gaf_out) assoc = parser.parse_line("\t".join(gpad_vals)).associations[0] writer.write_assoc(assoc) return gaf_out
def test_writing_assoc_properties(): line = "MGI:MGI:1922721\t\tRO:0002327\tGO:0019904\tMGI:MGI:3769586|PMID:17984326\tECO:0000353\tPR:Q0KK55\t\t2010-12-01\tMGI\tBFO:0000066(EMAPA:17787),RO:0002233(MGI:MGI:1923734)\tcreation-date=2008-02-07|modification-date=2010-12-01|comment=v-KIND domain binding of Kndc1;MGI:1923734|contributor-id=http://orcid.org/0000-0003-2689-5511|contributor-id=http://orcid.org/0000-0003-3394-9805" parser = gpadparser.GpadParser() parser.version = "2.0" out = io.StringIO() writer = assocwriter.GpadWriter( file=out, version="2.0") # Write back out to gpad 2.0 assoc = parser.parse_line(line).associations[0] writer.write_assoc(assoc) written_gpad_line = [ line for line in out.getvalue().split("\n") if not line.startswith("!") ][0] written_props = written_gpad_line.split("\t")[11] assert len(written_props.split("|")) == 5
def __init__(self, gpad_file, parser_config: AssocParserConfig): self.assocs = [] self.gpad_parser = gpadparser.GpadParser(config=parser_config) with open(gpad_file) as sg: lines = sum(1 for line in sg) with open(gpad_file) as gf: click.echo("Making products...") with click.progressbar( iterable=self.gpad_parser.association_generator( file=gf, skipheader=True), length=lines) as associations: self.assocs = list(associations) self.entity_parents = self.parse_gpi_parents( parser_config.gpi_authority_path)
def read_gpad_csv(filename, version) -> pd: if version.startswith("1"): data_frame = pd.read_csv(filename, comment='!', sep='\t', header=None, na_filter=False, names=gpad_1_2_format).fillna("") df = data_frame.filter(['db', 'subject', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'], axis=1) concat_column = df['db'] + ":" + df['subject'] df['concat_column'] = concat_column filtered_df = df.filter(['concat_column', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference']) filtered_df.rename(columns={'concat_column': 'subject'}, inplace=True) new_df = filtered_df else: data_frame = pd.read_csv(filename, comment='!', sep='\t', header=None, na_filter=False, names=gpad_2_0_format).fillna("") new_df = data_frame.filter(['subject', 'negation', 'relation', 'object', 'evidence_code', 'reference'], axis=1) ecomapping = ecomap.EcoMap() for eco_code in ecomapping.mappings(): for ev in new_df['evidence_code']: if eco_code[2] == ev: new_df['evidence_code'] = new_df['evidence_code'].replace([eco_code[2]], ecomapping.ecoclass_to_coderef(eco_code[2])[0]) # normalize ids config = assocparser.AssocParserConfig() config.remove_double_prefixes = True parser = gpadparser.GpadParser(config=config) for i, r in enumerate(new_df['subject']): r1 = parser._normalize_id(r) new_df.at[i, 'subject'] = r1 return new_df