def _process_pathways(self, limit=None): """ This method adds the KEGG pathway IDs. These are the canonical pathways as defined in KEGG. We also encode the graphical depiction which maps 1:1 with the identifier. Triples created: <pathway_id> is a GO:signal_transduction <pathway_id> rdfs:label <pathway_name> <gene_id> RO:involved_in <pathway_id> :param limit: :return: """ logger.info("Processing pathways") if self.testMode: g = self.testgraph else: g = self.graph line_counter = 0 path = Pathway(g, self.nobnodes) gu = GraphUtils(curie_map.get()) raw = '/'.join((self.rawdir, self.files['pathway']['file'])) with open(raw, 'r', encoding="iso-8859-1") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: line_counter += 1 (pathway_id, pathway_name) = row if self.testMode and \ pathway_id not in self.test_ids['pathway']: continue pathway_id = 'KEGG-'+pathway_id.strip() path.addPathway(pathway_id, pathway_name) # we know that the pathway images from kegg map 1:1 here. # so add those image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png' image_url = \ 'http://www.genome.jp/kegg/pathway/map/'+image_filename gu.addDepiction(g, pathway_id, image_url) if not self.testMode and \ limit is not None and line_counter > limit: break logger.info("Done with pathways") return
def _process_nlx_157874_1_view(self, raw, limit=None): """ This table contains the Elements of Morphology data that has been screen-scraped into DISCO. Note that foaf:depiction is inverse of foaf:depicts relationship. Since it is bad form to have two definitions, we concatenate the two into one string. Triples: <eom id> a owl:Class rdf:label Literal(eom label) OIO:hasRelatedSynonym Literal(synonym list) IAO:definition Literal(objective_def. subjective def) foaf:depiction Literal(small_image_url), Literal(large_image_url) foaf:page Literal(page_url) rdfs:comment Literal(long commented text) :param raw: :param limit: :return: """ gu = GraphUtils(curie_map.get()) line_counter = 0 with open(raw, 'r') as f1: f1.readline() # read the header row; skip filereader = csv.reader(f1, delimiter='\t', quotechar='\"') for line in filereader: line_counter += 1 (morphology_term_id, morphology_term_num, morphology_term_label, morphology_term_url, terminology_category_label, terminology_category_url, subcategory, objective_definition, subjective_definition, comments, synonyms, replaces, small_figure_url, large_figure_url, e_uid, v_uid, v_uuid, v_last_modified) = line # note: # e_uid v_uuid v_last_modified terminology_category_url # subcategory v_uid morphology_term_num # terminology_category_label hp_label notes # are currently unused. # Add morphology term to graph as a class # with label, type, and description. gu.addClassToGraph(self.graph, morphology_term_id, morphology_term_label) # Assemble the description text if subjective_definition != '' and not ( re.match(r'.+\.$', subjective_definition)): # add a trailing period. subjective_definition = subjective_definition.strip() + '.' if objective_definition != '' and not ( re.match(r'.+\.$', objective_definition)): # add a trailing period. objective_definition = objective_definition.strip() + '.' definition = \ ' '.join( (objective_definition, subjective_definition)).strip() gu.addDefinition(self.graph, morphology_term_id, definition) # <term id> FOAF:depicted_by literal url # <url> type foaf:depiction # do we want both images? # morphology_term_id has depiction small_figure_url if small_figure_url != '': gu.addDepiction(self.graph, morphology_term_id, small_figure_url) # morphology_term_id has depiction large_figure_url if large_figure_url != '': gu.addDepiction(self.graph, morphology_term_id, large_figure_url) # morphology_term_id has comment comments if comments != '': gu.addComment(self.graph, morphology_term_id, comments.strip()) if synonyms != '': for s in synonyms.split(';'): gu.addSynonym( self.graph, morphology_term_id, s.strip(), gu.properties['hasExactSynonym']) # morphology_term_id hasRelatedSynonym replaces (; delimited) if replaces != '' and replaces != synonyms: for s in replaces.split(';'): gu.addSynonym( self.graph, morphology_term_id, s.strip(), gu.properties['hasRelatedSynonym']) # morphology_term_id has page morphology_term_url gu.addPage(self.graph, morphology_term_id, morphology_term_url) if limit is not None and line_counter > limit: break return