Exemple #1
0
    def _process_pathways(self, limit=None):
        """
        This method adds the KEGG pathway IDs.
        These are the canonical pathways as defined in KEGG.
        We also encode the graphical depiction
        which maps 1:1 with the identifier.

        Triples created:
        <pathway_id> is a GO:signal_transduction
        <pathway_id> rdfs:label <pathway_name>
        <gene_id> RO:involved_in <pathway_id>
        :param limit:
        :return:

        """

        logger.info("Processing pathways")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        model = Model(g)
        line_counter = 0
        path = Pathway(g)
        raw = '/'.join((self.rawdir, self.files['pathway']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (pathway_id, pathway_name) = row

                if self.testMode and \
                        pathway_id not in self.test_ids['pathway']:
                    continue

                pathway_id = 'KEGG-'+pathway_id.strip()
                path.addPathway(pathway_id, pathway_name)

                # we know that the pathway images from kegg map 1:1 here.
                # so add those
                image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png'
                image_url = \
                    'http://www.genome.jp/kegg/pathway/map/'+image_filename
                model.addDepiction(pathway_id, image_url)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

        logger.info("Done with pathways")
        return
Exemple #2
0
    def _process_pathways(self, limit=None):
        """
        This method adds the KEGG pathway IDs.
        These are the canonical pathways as defined in KEGG.
        We also encode the graphical depiction
        which maps 1:1 with the identifier.

        Triples created:
        <pathway_id> is a GO:signal_transduction
        <pathway_id> rdfs:label <pathway_name>
        <gene_id> RO:involved_in <pathway_id>
        :param limit:
        :return:

        """

        LOG.info("Processing pathways")
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        model = Model(graph)
        path = Pathway(graph)
        raw = '/'.join((self.rawdir, self.files['pathway']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in reader:
                (pathway_id, pathway_name) = row

                if self.test_mode and pathway_id not in self.test_ids['pathway']:
                    continue

                pathway_id = 'KEGG-'+pathway_id.strip()
                path.addPathway(pathway_id, pathway_name)

                # we know that the pathway images from kegg map 1:1 here.
                # so add those
                image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png'
                image_url = 'http://www.genome.jp/kegg/pathway/map/'+image_filename
                model.addDepiction(pathway_id, image_url)

                if not self.test_mode and limit is not None and reader.line_num > limit:
                    break
        LOG.info("Done with pathways")
Exemple #3
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data .
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Turtle:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                oboInOwl:has_related_synonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)

        TEC_note: URL are not literals.


        :param raw:
        :param limit:
        :return:
        """

        src_key = 'tables'
        model = Model(self.graph)
        col = self.resources[src_key]['columns']
        with open(raw, 'r') as rawread:
            reader = csv.reader(rawread, delimiter='\t', quotechar='\"')
            row = next(reader)
            if not self.check_fileheader(col, row):
                pass

            for row in reader:
                # head -1 dvp.pr_nlx_157874_1|tr '\t' '\n'|
                # sed "s|\(.*\)|# \1 = row[col.index('\1')]|g"

                morphology_term_id = row[col.index(
                    'morphology_term_id')].strip()
                # morphology_term_num = row[col.index('morphology_term_num')]
                morphology_term_label = row[col.index(
                    'morphology_term_label')].strip()
                morphology_term_url = row[col.index(
                    'morphology_term_url')].strip()
                # terminology_category_label = row[
                #   col.index('terminology_category_label')]
                # terminology_category_url = row[col.index('terminology_category_url')]
                # subcategory = row[col.index('subcategory')]
                objective_definition = row[col.index(
                    'objective_definition')].strip()
                subjective_definition = row[col.index(
                    'subjective_definition')].strip()
                comments = row[col.index('comments')].strip()
                synonyms = row[col.index('synonyms')].strip()
                replaces = row[col.index('replaces')].strip()
                small_figure_url = row[col.index('small_figure_url')].strip()
                large_figure_url = row[col.index('large_figure_url')].strip()
                # e_uid = row[col.index('e_uid')]
                # v_uid = row[col.index('v_uid')]
                # v_uuid = row[col.index('v_uuid')]
                # v_lastmodified = row[col.index('v_lastmodified')]
                # v_status = row[col.index('v_status')]
                # v_lastmodified_epoch = row[col.index('v_lastmodified_epoch')]

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (re.match(
                        r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition + '.'
                if objective_definition != '' and not (re.match(
                        r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition + '.'

                definition = '  '.join(
                    (objective_definition, subjective_definition))

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id, small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id, large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id, comments)

                for syn in synonyms.split(';'):
                    model.addSynonym(morphology_term_id, syn.strip(),
                                     self.globaltt['has_exact_synonym'])

                # morphology_term_id has_related_synonym replaces (; delimited)
                if replaces not in ['', synonyms]:
                    for syn in replaces.split(';'):
                        model.addSynonym(morphology_term_id, syn.strip(),
                                         self.globaltt['has_related_synonym'])

                # <morphology_term_id> <foaf:page> morphology_term_url
                if morphology_term_id is not None:
                    reference = Reference(self.graph, morphology_term_id,
                                          self.globaltt['web page'])

                    # TEC 201905:
                    # Not so sure we need explicit   <eom_uri> <webpage> <eom_url>.
                    # since <eom_uri> IS the <eom_url>.

                    reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and reader.line_num > limit:
                    break
Exemple #4
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        model = Model(self.graph)
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid,
                 v_last_modified, v_status, v_lastmodified_epoch) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (
                        re.match(r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (
                        re.match(r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id,
                                       small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id,
                                       large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id,
                                     comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasRelatedSynonym'])

                # morphology_term_id has page morphology_term_url
                reference = Reference(self.graph)
                reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and line_counter > limit:
                    break
        return
Exemple #5
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        model = Model(self.graph)
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid, v_last_modified,
                 v_status, v_lastmodified_epoch) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (re.match(
                        r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (re.match(
                        r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id, small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id, large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id, comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasRelatedSynonym'])

                # morphology_term_id has page morphology_term_url
                reference = Reference(self.graph)
                reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and line_counter > limit:
                    break
        return