def read_gaf_csv(filename, version) -> pd: ecomapping = ecomap.EcoMap() data_frame = pd.read_csv(filename, comment='!', sep='\t', header=None, na_filter=False, names=["DB", "DB_Object_ID", "DB_Object_Symbol", "Qualifier", "GO_ID", "DB_Reference", "Evidence_code", "With_or_From", "Aspect", "DB_Object_Name", "DB_Object_Synonym", "DB_Object_Type," "Taxon", "Date", "Assigned_By", "Annotation_Extension", "Gene_Product_Form_ID"]).fillna("") new_df = data_frame.filter(['DB_Object_ID', 'Qualifier', 'GO_ID', 'Evidence_code', 'DB_Reference'], axis=1) for eco_code in ecomapping.mappings(): for ev in new_df['Evidence_code']: if eco_code[2] == ev: new_df['Evidence_code'] = new_df['Evidence_code'].replace([eco_code[2]], ecomapping.ecoclass_to_coderef( eco_code[2])[0]) return new_df
def __init__(self, remove_double_prefixes=False, ontology=None, repair_obsoletes=True, entity_map=None, valid_taxa=None, class_idspaces=None, entity_idspaces=None, ecomap=ecomap.EcoMap(), exclude_relations=None, include_relations=None, filter_out_evidence=None, filtered_evidence_file=None, gpi_authority_path=None, paint=False): self.remove_double_prefixes = remove_double_prefixes self.ontology = ontology self.repair_obsoletes = repair_obsoletes self.entity_map = entity_map self.valid_taxa = valid_taxa self.class_idspaces = class_idspaces self.ecomap = ecomap self.include_relations = include_relations self.exclude_relations = exclude_relations self.filter_out_evidence = filter_out_evidence self.filtered_evidence_file = filtered_evidence_file self.gpi_authority_path = gpi_authority_path self.paint = paint if self.exclude_relations is None: self.exclude_relations = [] if self.include_relations is None: self.include_relations = [] if self.filter_out_evidence is None: self.filter_out_evidence = []
def __init__(self, file=None, version=GPAD_1_2): self.file = file if version in [GPAD_1_2, GPAD_2_0]: self.version = version else: self.version = GPAD_1_2 self._write("!gpa-version: {}\n".format(self.version)) self.ecomap = ecomap.EcoMap()
def __init__(self, remove_double_prefixes=False, ontology=None, repair_obsoletes=True, entity_map=None, valid_taxa=None, class_idspaces=None, entity_idspaces=None, group_idspace=None, ecomap=ecomap.EcoMap(), exclude_relations=None, include_relations=None, filter_out_evidence=None, filtered_evidence_file=None, gpi_authority_path=None, paint=False, rule_metadata=dict(), goref_metadata=None, group_metadata=None, dbxrefs=None, suppress_rule_reporting_tags=[], annotation_inferences=None, extensions_constraints=None, rule_contexts=[]): self.remove_double_prefixes = remove_double_prefixes self.ontology = ontology self.repair_obsoletes = repair_obsoletes self.entity_map = entity_map self.valid_taxa = valid_taxa self.class_idspaces = class_idspaces self.ecomap = ecomap self.include_relations = include_relations self.exclude_relations = exclude_relations self.filter_out_evidence = filter_out_evidence self.filtered_evidence_file = filtered_evidence_file self.gpi_authority_path = gpi_authority_path self.paint = paint self.rule_metadata = rule_metadata self.goref_metadata = goref_metadata self.group_metadata = group_metadata self.suppress_rule_reporting_tags = suppress_rule_reporting_tags self.annotation_inferences = annotation_inferences self.entity_idspaces = entity_idspaces self.extensions_constraints = AssocParserConfig._compute_constraint_subclasses( extensions_constraints, ontology) self.group_idspace = None if group_idspace is None else set( group_idspace) self.rule_contexts = rule_contexts # This is a dictionary from ruleid: `gorule-0000001` to title strings if self.exclude_relations is None: self.exclude_relations = [] if self.include_relations is None: self.include_relations = [] if self.filter_out_evidence is None: self.filter_out_evidence = []
def __init__(self, remove_double_prefixes=False, ontology=None, repair_obsoletes=True, entity_map=None, valid_taxa=None, class_idspaces=None, entity_idspaces=None, group_idspace=None, ecomap=ecomap.EcoMap(), exclude_relations=None, include_relations=None, filter_out_evidence=None, filtered_evidence_file=None, gpi_authority_path=None, paint=False, rule_titles=None, dbxrefs=None): self.remove_double_prefixes=remove_double_prefixes self.ontology=ontology self.repair_obsoletes=repair_obsoletes self.entity_map=entity_map self.valid_taxa=valid_taxa self.class_idspaces=class_idspaces self.ecomap=ecomap self.include_relations=include_relations self.exclude_relations=exclude_relations self.filter_out_evidence = filter_out_evidence self.filtered_evidence_file = filtered_evidence_file self.gpi_authority_path = gpi_authority_path self.paint = paint self.rule_titles = rule_titles self.entity_idspaces = None if entity_idspaces is None else set(entity_idspaces) self.group_idspace = None if group_idspace is None else set(group_idspace) # This is a dictionary from ruleid: `gorule-0000001` to title strings if self.exclude_relations is None: self.exclude_relations = [] if self.include_relations is None: self.include_relations = [] if self.filter_out_evidence is None: self.filter_out_evidence = []
def read_gpad_csv(filename, version) -> pd: if version.startswith("1"): data_frame = pd.read_csv(filename, comment='!', sep='\t', header=None, na_filter=False, names=gpad_1_2_format).fillna("") df = data_frame.filter(['db', 'subject', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'], axis=1) concat_column = df['db'] + ":" + df['subject'] df['concat_column'] = concat_column filtered_df = df.filter(['concat_column', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference']) filtered_df.rename(columns={'concat_column': 'subject'}, inplace=True) new_df = filtered_df else: data_frame = pd.read_csv(filename, comment='!', sep='\t', header=None, na_filter=False, names=gpad_2_0_format).fillna("") new_df = data_frame.filter(['subject', 'negation', 'relation', 'object', 'evidence_code', 'reference'], axis=1) ecomapping = ecomap.EcoMap() for eco_code in ecomapping.mappings(): for ev in new_df['evidence_code']: if eco_code[2] == ev: new_df['evidence_code'] = new_df['evidence_code'].replace([eco_code[2]], ecomapping.ecoclass_to_coderef(eco_code[2])[0]) # normalize ids config = assocparser.AssocParserConfig() config.remove_double_prefixes = True parser = gpadparser.GpadParser(config=config) for i, r in enumerate(new_df['subject']): r1 = parser._normalize_id(r) new_df.at[i, 'subject'] = r1 return new_df
import logging from typing import List from ontobio.io import assocparser from ontobio.io.gpadparser import GpadParser from ontobio.model.association import GoAssociation, Date from ontobio.rdfgen.gocamgen import errors from ontobio import ecomap logger = logging.getLogger(__name__) ecomapping = ecomap.EcoMap() ipi_eco = ecomapping.coderef_to_ecoclass("IPI") GPAD_PARSER = GpadParser() BINDING_ROOT = "GO:0005488" # binding IPI_ECO_CODE = ipi_eco class GoAssocWithFrom: """ Separate with/from column values into header vs line arrangement. Used for explicit placement in annotation assertions. """ def __init__(self, header=None, line=None): if header is None: header = [] if line is None: line = [] self.header = sorted(header)
def __init__(self, file=None): self.file = file self._write("!gpa-version: 1.1\n") self.ecomap = ecomap.EcoMap()
def __init__(self): super().__init__( "GORULE:0000043", "Check for valid combination of evidence code and GO_REF", FailMode.SOFT) self.ecomapping = ecomap.EcoMap()
def __init__(self, remove_double_prefixes=False, ontology=None, repair_obsoletes=True, entity_map=None, valid_taxa=None, class_idspaces=None, entity_idspaces=None, group_idspace=None, ecomap=ecomap.EcoMap(), exclude_relations=None, include_relations=None, filter_out_evidence=None, filtered_evidence_file=None, gpi_authority_path=None, paint=False, rule_metadata=dict(), goref_metadata=None, group_metadata=None, dbxrefs=None, suppress_rule_reporting_tags=[], annotation_inferences=None, extensions_constraints=None, rule_contexts=[], rule_set=None): self.remove_double_prefixes = remove_double_prefixes self.ontology = ontology self.repair_obsoletes = repair_obsoletes self.entity_map = entity_map self.valid_taxa = valid_taxa self.class_idspaces = class_idspaces self.ecomap = ecomap self.include_relations = include_relations self.exclude_relations = exclude_relations self.filter_out_evidence = filter_out_evidence self.filtered_evidence_file = filtered_evidence_file self.gpi_authority_path = gpi_authority_path self.paint = paint self.rule_metadata = rule_metadata self.goref_metadata = goref_metadata self.group_metadata = group_metadata self.suppress_rule_reporting_tags = suppress_rule_reporting_tags self.annotation_inferences = annotation_inferences self.entity_idspaces = entity_idspaces self.extensions_constraints = AssocParserConfig._compute_constraint_subclasses( extensions_constraints, ontology) self.group_idspace = None if group_idspace is None else set( group_idspace) self.rule_contexts = rule_contexts # We'll say that the default None should run no rules, so let's set the rule_set to [] # print("Rule Set is {}".format(rule_set)) if rule_set == None: self.rule_set = RuleSet([]) elif rule_set == RuleSet.ALL: # None here means all rules self.rule_set = RuleSet(None) else: self.rule_set = RuleSet(rule_set) # This is a dictionary from ruleid: `gorule-0000001` to title strings if self.exclude_relations is None: self.exclude_relations = [] if self.include_relations is None: self.include_relations = [] if self.filter_out_evidence is None: self.filter_out_evidence = []