def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'ctd') self.dataset = Dataset( 'ctd', 'CTD', 'http://ctdbase.org', None, 'http://ctdbase.org/about/legal.jsp') if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = config.get_config()['test_ids']['disease'] self.g = self.graph self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'ctd', ingest_title='Comparative Toxicogenomics Database', ingest_url='http://ctdbase.org', license_url=None, data_rights='http://ctdbase.org/about/legal.jsp' # file_handle=None ) if 'gene' not in self.all_test_ids: LOG.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = self.all_test_ids['gene'] if 'disease' not in self.all_test_ids: LOG.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = self.all_test_ids['disease'] self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) return
def _process_pathway_ko(self, limit): """ This adds the kegg orthologous group (gene) to the canonical pathway. :param limit: :return: """ LOG.info("Processing KEGG pathways to kegg ortholog classes") if self.test_mode: graph = self.testgraph else: graph = self.graph line_counter = 0 raw = '/'.join((self.rawdir, self.files['pathway_ko']['file'])) with open(raw, 'r', encoding="iso-8859-1") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: line_counter += 1 (ko_id, pathway_id) = row if self.test_mode and pathway_id not in self.test_ids['pathway']: continue pathway_id = 'KEGG-' + pathway_id ko_id = 'KEGG-' + ko_id p = Pathway(graph) p.addGeneToPathway(ko_id, pathway_id) if not self.test_mode and limit is not None and line_counter > limit: break return
def parse(self, limit=None): """ Override Source.parse() Parses version and interaction information from CTD Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: LOG.info("Only parsing first %d rows", limit) LOG.info("Parsing files...") if self.test_only: self.test_mode = True self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) src_key = 'chemical_disease_associations' self._parse_ctd_file(limit, src_key) # self._parse_ctd_file(limit, 'gene_pathway') # self._parse_ctd_file(limit, 'gene_disease') src_key = 'publications' file_path = '/'.join((self.rawdir, self.api_fetch[src_key]['file'])) if os.path.exists(file_path) is True: self._parse_curated_chem_disease(file_path, limit) else: LOG.error('Batch Query file "%s" does not exist', file_path) LOG.info("Done parsing files.")
def _add_component_pathway_association(self, gene_curie, pathway_curie, pathway_label, eco_curie): pathway = Pathway(self.graph) pathway.addPathway(pathway_curie, pathway_label) pathway.addComponentToPathway(gene_curie, pathway_curie) association = Assoc(self.graph, self.name) association.sub = gene_curie association.rel = self.globaltt['involved in'] association.obj = pathway_curie association.set_association_id() association.add_evidence(eco_curie) association.add_association_to_graph()
def _process_pathways(self, limit=None): """ This method adds the KEGG pathway IDs. These are the canonical pathways as defined in KEGG. We also encode the graphical depiction which maps 1:1 with the identifier. Triples created: <pathway_id> is a GO:signal_transduction <pathway_id> rdfs:label <pathway_name> <gene_id> RO:involved_in <pathway_id> :param limit: :return: """ logger.info("Processing pathways") if self.testMode: g = self.testgraph else: g = self.graph model = Model(g) line_counter = 0 path = Pathway(g) raw = '/'.join((self.rawdir, self.files['pathway']['file'])) with open(raw, 'r', encoding="iso-8859-1") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: line_counter += 1 (pathway_id, pathway_name) = row if self.testMode and \ pathway_id not in self.test_ids['pathway']: continue pathway_id = 'KEGG-'+pathway_id.strip() path.addPathway(pathway_id, pathway_name) # we know that the pathway images from kegg map 1:1 here. # so add those image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png' image_url = \ 'http://www.genome.jp/kegg/pathway/map/'+image_filename model.addDepiction(pathway_id, image_url) if not self.testMode and \ limit is not None and line_counter > limit: break logger.info("Done with pathways") return
def _add_component_pathway_association( self, eco_map, component, component_prefix, pathway_id, pathway_prefix, pathway_label, go_ecode): pathway = Pathway(self.graph) pathway_curie = "{}:{}".format(pathway_prefix, pathway_id) gene_curie = "{}:{}".format(component_prefix, component.strip()) eco_curie = eco_map[go_ecode] pathway.addPathway(pathway_curie, pathway_label) pathway.addComponentToPathway(gene_curie, pathway_curie) association = Assoc(self.graph, self.name) association.sub = gene_curie association.rel = self.globaltt['involved in'] association.obj = pathway_curie association.set_association_id() association.add_evidence(eco_curie) association.add_association_to_graph() return
def parse(self, limit=None): """ Override Source.parse() Parses version and interaction information from CTD Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: logger.info("Only parsing first %d rows", limit) logger.info("Parsing files...") # pub_map = dict() # file_path = '/'.join((self.rawdir, # self.static_files['publications']['file'])) # if os.path.exists(file_path) is True: # pub_map = self._parse_publication_file( # self.static_files['publications']['file'] # ) if self.testOnly: self.testMode = True if self.testMode: self.g = self.testgraph else: self.g = self.graph self.geno = Genotype(self.g) self.pathway = Pathway(self.g) self._parse_ctd_file( limit, self.files['chemical_disease_interactions']['file']) self._parse_ctd_file(limit, self.files['gene_pathway']['file']) self._parse_ctd_file(limit, self.files['gene_disease']['file']) self._parse_curated_chem_disease(limit) logger.info("Done parsing files.") return
def parse(self, limit=None): """ Override Source.parse() Parses version and interaction information from CTD Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: LOG.info("Only parsing first %d rows", limit) LOG.info("Parsing files...") if self.test_only: self.test_mode = True self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) src_key = 'chemical_disease_associations' self._parse_ctd_file(limit, src_key)