def _process_pathways(self, limit=None): """ This method adds the KEGG pathway IDs. These are the canonical pathways as defined in KEGG. We also encode the graphical depiction which maps 1:1 with the identifier. Triples created: <pathway_id> is a GO:signal_transduction <pathway_id> rdfs:label <pathway_name> <gene_id> RO:involved_in <pathway_id> :param limit: :return: """ logger.info("Processing pathways") if self.testMode: g = self.testgraph else: g = self.graph model = Model(g) line_counter = 0 path = Pathway(g) raw = '/'.join((self.rawdir, self.files['pathway']['file'])) with open(raw, 'r', encoding="iso-8859-1") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: line_counter += 1 (pathway_id, pathway_name) = row if self.testMode and \ pathway_id not in self.test_ids['pathway']: continue pathway_id = 'KEGG-'+pathway_id.strip() path.addPathway(pathway_id, pathway_name) # we know that the pathway images from kegg map 1:1 here. # so add those image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png' image_url = \ 'http://www.genome.jp/kegg/pathway/map/'+image_filename model.addDepiction(pathway_id, image_url) if not self.testMode and \ limit is not None and line_counter > limit: break logger.info("Done with pathways") return
def _process_pathways(self, limit=None): """ This method adds the KEGG pathway IDs. These are the canonical pathways as defined in KEGG. We also encode the graphical depiction which maps 1:1 with the identifier. Triples created: <pathway_id> is a GO:signal_transduction <pathway_id> rdfs:label <pathway_name> <gene_id> RO:involved_in <pathway_id> :param limit: :return: """ LOG.info("Processing pathways") if self.test_mode: graph = self.testgraph else: graph = self.graph model = Model(graph) path = Pathway(graph) raw = '/'.join((self.rawdir, self.files['pathway']['file'])) with open(raw, 'r', encoding="iso-8859-1") as csvfile: reader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in reader: (pathway_id, pathway_name) = row if self.test_mode and pathway_id not in self.test_ids['pathway']: continue pathway_id = 'KEGG-'+pathway_id.strip() path.addPathway(pathway_id, pathway_name) # we know that the pathway images from kegg map 1:1 here. # so add those image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png' image_url = 'http://www.genome.jp/kegg/pathway/map/'+image_filename model.addDepiction(pathway_id, image_url) if not self.test_mode and limit is not None and reader.line_num > limit: break LOG.info("Done with pathways")
def _process_nlx_157874_1_view(self, raw, limit=None): """ This table contains the Elements of Morphology data . Note that foaf:depiction is inverse of foaf:depicts relationship. Since it is bad form to have two definitions, we concatenate the two into one string. Turtle: <eom id> a owl:Class rdf:label Literal(eom label) oboInOwl:has_related_synonym Literal(synonym list) IAO:definition Literal(objective_def. subjective def) foaf:depiction Literal(small_image_url), Literal(large_image_url) foaf:page Literal(page_url) rdfs:comment Literal(long commented text) TEC_note: URL are not literals. :param raw: :param limit: :return: """ src_key = 'tables' model = Model(self.graph) col = self.resources[src_key]['columns'] with open(raw, 'r') as rawread: reader = csv.reader(rawread, delimiter='\t', quotechar='\"') row = next(reader) if not self.check_fileheader(col, row): pass for row in reader: # head -1 dvp.pr_nlx_157874_1|tr '\t' '\n'| # sed "s|\(.*\)|# \1 = row[col.index('\1')]|g" morphology_term_id = row[col.index( 'morphology_term_id')].strip() # morphology_term_num = row[col.index('morphology_term_num')] morphology_term_label = row[col.index( 'morphology_term_label')].strip() morphology_term_url = row[col.index( 'morphology_term_url')].strip() # terminology_category_label = row[ # col.index('terminology_category_label')] # terminology_category_url = row[col.index('terminology_category_url')] # subcategory = row[col.index('subcategory')] objective_definition = row[col.index( 'objective_definition')].strip() subjective_definition = row[col.index( 'subjective_definition')].strip() comments = row[col.index('comments')].strip() synonyms = row[col.index('synonyms')].strip() replaces = row[col.index('replaces')].strip() small_figure_url = row[col.index('small_figure_url')].strip() large_figure_url = row[col.index('large_figure_url')].strip() # e_uid = row[col.index('e_uid')] # v_uid = row[col.index('v_uid')] # v_uuid = row[col.index('v_uuid')] # v_lastmodified = row[col.index('v_lastmodified')] # v_status = row[col.index('v_status')] # v_lastmodified_epoch = row[col.index('v_lastmodified_epoch')] # Add morphology term to graph as a class # with label, type, and description. model.addClassToGraph(morphology_term_id, morphology_term_label) # Assemble the description text if subjective_definition != '' and not (re.match( r'.+\.$', subjective_definition)): # add a trailing period. subjective_definition = subjective_definition + '.' if objective_definition != '' and not (re.match( r'.+\.$', objective_definition)): # add a trailing period. objective_definition = objective_definition + '.' definition = ' '.join( (objective_definition, subjective_definition)) model.addDefinition(morphology_term_id, definition) # <term id> FOAF:depicted_by literal url # <url> type foaf:depiction # do we want both images? # morphology_term_id has depiction small_figure_url if small_figure_url != '': model.addDepiction(morphology_term_id, small_figure_url) # morphology_term_id has depiction large_figure_url if large_figure_url != '': model.addDepiction(morphology_term_id, large_figure_url) # morphology_term_id has comment comments if comments != '': model.addComment(morphology_term_id, comments) for syn in synonyms.split(';'): model.addSynonym(morphology_term_id, syn.strip(), self.globaltt['has_exact_synonym']) # morphology_term_id has_related_synonym replaces (; delimited) if replaces not in ['', synonyms]: for syn in replaces.split(';'): model.addSynonym(morphology_term_id, syn.strip(), self.globaltt['has_related_synonym']) # <morphology_term_id> <foaf:page> morphology_term_url if morphology_term_id is not None: reference = Reference(self.graph, morphology_term_id, self.globaltt['web page']) # TEC 201905: # Not so sure we need explicit <eom_uri> <webpage> <eom_url>. # since <eom_uri> IS the <eom_url>. reference.addPage(morphology_term_id, morphology_term_url) if limit is not None and reader.line_num > limit: break
def _process_nlx_157874_1_view(self, raw, limit=None): """ This table contains the Elements of Morphology data that has been screen-scraped into DISCO. Note that foaf:depiction is inverse of foaf:depicts relationship. Since it is bad form to have two definitions, we concatenate the two into one string. Triples: <eom id> a owl:Class rdf:label Literal(eom label) OIO:hasRelatedSynonym Literal(synonym list) IAO:definition Literal(objective_def. subjective def) foaf:depiction Literal(small_image_url), Literal(large_image_url) foaf:page Literal(page_url) rdfs:comment Literal(long commented text) :param raw: :param limit: :return: """ model = Model(self.graph) line_counter = 0 with open(raw, 'r') as f1: f1.readline() # read the header row; skip filereader = csv.reader(f1, delimiter='\t', quotechar='\"') for line in filereader: line_counter += 1 (morphology_term_id, morphology_term_num, morphology_term_label, morphology_term_url, terminology_category_label, terminology_category_url, subcategory, objective_definition, subjective_definition, comments, synonyms, replaces, small_figure_url, large_figure_url, e_uid, v_uid, v_uuid, v_last_modified, v_status, v_lastmodified_epoch) = line # note: # e_uid v_uuid v_last_modified terminology_category_url # subcategory v_uid morphology_term_num # terminology_category_label hp_label notes # are currently unused. # Add morphology term to graph as a class # with label, type, and description. model.addClassToGraph(morphology_term_id, morphology_term_label) # Assemble the description text if subjective_definition != '' and not ( re.match(r'.+\.$', subjective_definition)): # add a trailing period. subjective_definition = subjective_definition.strip() + '.' if objective_definition != '' and not ( re.match(r'.+\.$', objective_definition)): # add a trailing period. objective_definition = objective_definition.strip() + '.' definition = \ ' '.join( (objective_definition, subjective_definition)).strip() model.addDefinition(morphology_term_id, definition) # <term id> FOAF:depicted_by literal url # <url> type foaf:depiction # do we want both images? # morphology_term_id has depiction small_figure_url if small_figure_url != '': model.addDepiction(morphology_term_id, small_figure_url) # morphology_term_id has depiction large_figure_url if large_figure_url != '': model.addDepiction(morphology_term_id, large_figure_url) # morphology_term_id has comment comments if comments != '': model.addComment(morphology_term_id, comments.strip()) if synonyms != '': for s in synonyms.split(';'): model.addSynonym( morphology_term_id, s.strip(), model.annotation_properties['hasExactSynonym']) # morphology_term_id hasRelatedSynonym replaces (; delimited) if replaces != '' and replaces != synonyms: for s in replaces.split(';'): model.addSynonym( morphology_term_id, s.strip(), model.annotation_properties['hasRelatedSynonym']) # morphology_term_id has page morphology_term_url reference = Reference(self.graph) reference.addPage(morphology_term_id, morphology_term_url) if limit is not None and line_counter > limit: break return
def _process_nlx_157874_1_view(self, raw, limit=None): """ This table contains the Elements of Morphology data that has been screen-scraped into DISCO. Note that foaf:depiction is inverse of foaf:depicts relationship. Since it is bad form to have two definitions, we concatenate the two into one string. Triples: <eom id> a owl:Class rdf:label Literal(eom label) OIO:hasRelatedSynonym Literal(synonym list) IAO:definition Literal(objective_def. subjective def) foaf:depiction Literal(small_image_url), Literal(large_image_url) foaf:page Literal(page_url) rdfs:comment Literal(long commented text) :param raw: :param limit: :return: """ model = Model(self.graph) line_counter = 0 with open(raw, 'r') as f1: f1.readline() # read the header row; skip filereader = csv.reader(f1, delimiter='\t', quotechar='\"') for line in filereader: line_counter += 1 (morphology_term_id, morphology_term_num, morphology_term_label, morphology_term_url, terminology_category_label, terminology_category_url, subcategory, objective_definition, subjective_definition, comments, synonyms, replaces, small_figure_url, large_figure_url, e_uid, v_uid, v_uuid, v_last_modified, v_status, v_lastmodified_epoch) = line # note: # e_uid v_uuid v_last_modified terminology_category_url # subcategory v_uid morphology_term_num # terminology_category_label hp_label notes # are currently unused. # Add morphology term to graph as a class # with label, type, and description. model.addClassToGraph(morphology_term_id, morphology_term_label) # Assemble the description text if subjective_definition != '' and not (re.match( r'.+\.$', subjective_definition)): # add a trailing period. subjective_definition = subjective_definition.strip() + '.' if objective_definition != '' and not (re.match( r'.+\.$', objective_definition)): # add a trailing period. objective_definition = objective_definition.strip() + '.' definition = \ ' '.join( (objective_definition, subjective_definition)).strip() model.addDefinition(morphology_term_id, definition) # <term id> FOAF:depicted_by literal url # <url> type foaf:depiction # do we want both images? # morphology_term_id has depiction small_figure_url if small_figure_url != '': model.addDepiction(morphology_term_id, small_figure_url) # morphology_term_id has depiction large_figure_url if large_figure_url != '': model.addDepiction(morphology_term_id, large_figure_url) # morphology_term_id has comment comments if comments != '': model.addComment(morphology_term_id, comments.strip()) if synonyms != '': for s in synonyms.split(';'): model.addSynonym( morphology_term_id, s.strip(), model.annotation_properties['hasExactSynonym']) # morphology_term_id hasRelatedSynonym replaces (; delimited) if replaces != '' and replaces != synonyms: for s in replaces.split(';'): model.addSynonym( morphology_term_id, s.strip(), model.annotation_properties['hasRelatedSynonym']) # morphology_term_id has page morphology_term_url reference = Reference(self.graph) reference.addPage(morphology_term_id, morphology_term_url) if limit is not None and line_counter > limit: break return