Example #1
0
    def _process_collection(self, collection_id, label, page):
        """
        This function will process the data supplied internally
        about the repository from Coriell.

        Triples:
            Repository a ERO:collection
            rdf:label Literal(label)
            foaf:page Literal(page)

        :param collection_id:
        :param label:
        :param page:
        :return:
        """
        # #############    BUILD THE CELL LINE REPOSITORY    #############
        for graph in [self.graph, self.testgraph]:
            # TODO: How to devise a label for each repository?
            model = Model(graph)
            reference = Reference(graph)
            repo_id = 'CoriellCollection:' + collection_id
            repo_label = label
            repo_page = page

            model.addIndividualToGraph(
                repo_id, repo_label, self.globaltt['collection'])
            reference.addPage(repo_id, repo_page)

        return
Example #2
0
    def _process_collection(self, collection_id, label, page):
        """
        This function will process the data supplied internally
        about the repository from Coriell.

        Triples:
            Repository a ERO:collection
            rdf:label Literal(label)
            foaf:page Literal(page)

        :param collection_id:
        :param label:
        :param page:
        :return:
        """
        # #############    BUILD THE CELL LINE REPOSITORY    #############
        for graph in [self.graph, self.testgraph]:
            # TODO: How to devise a label for each repository?
            model = Model(graph)
            reference = Reference(graph)
            repo_id = 'CoriellCollection:' + collection_id
            repo_label = label
            repo_page = page

            model.addIndividualToGraph(repo_id, repo_label,
                                       self.globaltt['collection'])
            reference.addPage(repo_id, repo_page)

        return
Example #3
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data .
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Turtle:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                oboInOwl:has_related_synonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)

        TEC_note: URL are not literals.


        :param raw:
        :param limit:
        :return:
        """

        src_key = 'tables'
        model = Model(self.graph)
        col = self.resources[src_key]['columns']
        with open(raw, 'r') as rawread:
            reader = csv.reader(rawread, delimiter='\t', quotechar='\"')
            row = next(reader)
            if not self.check_fileheader(col, row):
                pass

            for row in reader:
                # head -1 dvp.pr_nlx_157874_1|tr '\t' '\n'|
                # sed "s|\(.*\)|# \1 = row[col.index('\1')]|g"

                morphology_term_id = row[col.index(
                    'morphology_term_id')].strip()
                # morphology_term_num = row[col.index('morphology_term_num')]
                morphology_term_label = row[col.index(
                    'morphology_term_label')].strip()
                morphology_term_url = row[col.index(
                    'morphology_term_url')].strip()
                # terminology_category_label = row[
                #   col.index('terminology_category_label')]
                # terminology_category_url = row[col.index('terminology_category_url')]
                # subcategory = row[col.index('subcategory')]
                objective_definition = row[col.index(
                    'objective_definition')].strip()
                subjective_definition = row[col.index(
                    'subjective_definition')].strip()
                comments = row[col.index('comments')].strip()
                synonyms = row[col.index('synonyms')].strip()
                replaces = row[col.index('replaces')].strip()
                small_figure_url = row[col.index('small_figure_url')].strip()
                large_figure_url = row[col.index('large_figure_url')].strip()
                # e_uid = row[col.index('e_uid')]
                # v_uid = row[col.index('v_uid')]
                # v_uuid = row[col.index('v_uuid')]
                # v_lastmodified = row[col.index('v_lastmodified')]
                # v_status = row[col.index('v_status')]
                # v_lastmodified_epoch = row[col.index('v_lastmodified_epoch')]

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (re.match(
                        r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition + '.'
                if objective_definition != '' and not (re.match(
                        r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition + '.'

                definition = '  '.join(
                    (objective_definition, subjective_definition))

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id, small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id, large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id, comments)

                for syn in synonyms.split(';'):
                    model.addSynonym(morphology_term_id, syn.strip(),
                                     self.globaltt['has_exact_synonym'])

                # morphology_term_id has_related_synonym replaces (; delimited)
                if replaces not in ['', synonyms]:
                    for syn in replaces.split(';'):
                        model.addSynonym(morphology_term_id, syn.strip(),
                                         self.globaltt['has_related_synonym'])

                # <morphology_term_id> <foaf:page> morphology_term_url
                if morphology_term_id is not None:
                    reference = Reference(self.graph, morphology_term_id,
                                          self.globaltt['web page'])

                    # TEC 201905:
                    # Not so sure we need explicit   <eom_uri> <webpage> <eom_url>.
                    # since <eom_uri> IS the <eom_url>.

                    reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and reader.line_num > limit:
                    break
Example #4
0
    def _get_process_allelic_variants(self, entry, g):
        model = Model(g)
        reference = Reference(g)
        geno = Genotype(g)
        if entry is not None:
            # to hold the entry-specific publication mentions
            # for the allelic variants
            publist = {}
            entry_num = entry['mimNumber']

            # process the ref list just to get the pmids
            ref_to_pmid = self._get_pubs(entry, g)

            if 'allelicVariantList' in entry:
                allelicVariantList = entry['allelicVariantList']
                for al in allelicVariantList:
                    al_num = al['allelicVariant']['number']
                    al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
                    al_label = None
                    al_description = None
                    if al['allelicVariant']['status'] == 'live':
                        publist[al_id] = set()
                        if 'mutations' in al['allelicVariant']:
                            al_label = al['allelicVariant']['mutations']
                        if 'text' in al['allelicVariant']:
                            al_description = al['allelicVariant']['text']
                            m = re.findall(r'\{(\d+)\:', al_description)
                            publist[al_id] = set(m)
                        geno.addAllele(
                            al_id, al_label, geno.genoparts['variant_locus'],
                            al_description)
                        geno.addAlleleOfGene(
                            al_id, 'OMIM:'+str(entry_num),
                            geno.object_properties[
                                'is_sequence_variant_instance_of'])
                        for r in publist[al_id]:
                            pmid = ref_to_pmid[int(r)]
                            g.addTriple(
                                pmid, model.object_properties['is_about'],
                                al_id)
                        # look up the pubmed id in the list of references
                        if 'dbSnps' in al['allelicVariant']:
                            dbsnp_ids = \
                                re.split(r',', al['allelicVariant']['dbSnps'])
                            for dnum in dbsnp_ids:
                                did = 'dbSNP:'+dnum.strip()
                                model.addIndividualToGraph(did, None)
                                model.addSameIndividual(al_id, did)
                        if 'clinvarAccessions' in al['allelicVariant']:
                            # clinvarAccessions triple semicolon delimited
                            # each >1 like RCV000020059;;;
                            rcv_ids = \
                                re.split(
                                    r';;;',
                                    al['allelicVariant']['clinvarAccessions'])
                            rcv_ids = [
                                (re.match(r'(RCV\d+);*', r)).group(1)
                                for r in rcv_ids]
                            for rnum in rcv_ids:
                                rid = 'ClinVar:'+rnum
                                model.addXref(al_id, rid)
                        reference.addPage(
                            al_id, "http://omim.org/entry/" +
                            str(entry_num)+"#" + str(al_num).zfill(4))
                    elif re.search(
                            r'moved', al['allelicVariant']['status']):
                        # for both 'moved' and 'removed'
                        moved_ids = None
                        if 'movedTo' in al['allelicVariant']:
                            moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
                            moved_ids = [moved_id]
                        model.addDeprecatedIndividual(al_id, moved_ids)
                    else:
                        logger.error('Uncaught alleleic variant status %s',
                                     al['allelicVariant']['status'])
                # end loop allelicVariantList

        return
Example #5
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        model = Model(self.graph)
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid,
                 v_last_modified, v_status, v_lastmodified_epoch) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (
                        re.match(r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (
                        re.match(r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id,
                                       small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id,
                                       large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id,
                                     comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasRelatedSynonym'])

                # morphology_term_id has page morphology_term_url
                reference = Reference(self.graph)
                reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and line_counter > limit:
                    break
        return
Example #6
0
    def _get_process_allelic_variants(self, entry, graph):
        model = Model(graph)
        reference = Reference(graph)
        geno = Genotype(graph)
        if entry is not None:
            # to hold the entry-specific publication mentions
            # for the allelic variants
            publist = {}
            entry_num = entry['mimNumber']

            # process the ref list just to get the pmids
            ref_to_pmid = self._get_pubs(entry, graph)

            if 'allelicVariantList' in entry:
                for alv in entry['allelicVariantList']:
                    al_num = alv['allelicVariant']['number']
                    al_id = 'OMIM:' + str(entry_num) + '.' + str(al_num).zfill(
                        4)
                    al_label = None
                    al_description = None
                    if alv['allelicVariant']['status'] == 'live':
                        publist[al_id] = set()
                        if 'mutations' in alv['allelicVariant']:
                            al_label = alv['allelicVariant']['mutations']
                        if 'text' in alv['allelicVariant']:
                            al_description = alv['allelicVariant']['text']
                            mch = re.findall(r'\{(\d+)\:', al_description)
                            publist[al_id] = set(mch)
                        geno.addAllele(al_id, al_label,
                                       self.globaltt['variant_locus'],
                                       al_description)
                        geno.addAlleleOfGene(al_id, 'OMIM:' + str(entry_num),
                                             self.globaltt['is_allele_of'])
                        for ref in publist[al_id]:
                            pmid = ref_to_pmid[int(ref)]
                            graph.addTriple(pmid, self.globaltt['is_about'],
                                            al_id)
                        # look up the pubmed id in the list of references
                        if 'dbSnps' in alv['allelicVariant']:
                            dbsnp_ids = re.split(
                                r',', alv['allelicVariant']['dbSnps'])
                            for dnum in dbsnp_ids:
                                did = 'dbSNP:' + dnum.strip()
                                model.addIndividualToGraph(did, None)
                                model.addSameIndividual(al_id, did)

                        # Note that RCVs are variant to disease associations
                        # in ClinVar, rather than variant entries
                        # so we make these xrefs instead of equivalents
                        if 'clinvarAccessions' in alv['allelicVariant']:
                            # clinvarAccessions triple semicolon delimited
                            # each >1 like RCV000020059;;;
                            rcv_ids = \
                                alv['allelicVariant']['clinvarAccessions'].split(';;;')
                            rcv_ids = [rcv[:12]
                                       for rcv in rcv_ids]  # incase more cruft

                            for rnum in rcv_ids:
                                rid = 'ClinVar:' + rnum
                                model.addXref(al_id, rid)
                        reference.addPage(
                            al_id, "http://omim.org/entry/" + '#'.join(
                                (str(entry_num), str(al_num).zfill(4))))
                    elif re.search(r'moved', alv['allelicVariant']['status']):
                        # for both 'moved' and 'removed'
                        moved_ids = None
                        if 'movedTo' in alv['allelicVariant']:
                            moved_id = 'OMIM:' + alv['allelicVariant'][
                                'movedTo']
                            moved_ids = [moved_id]
                        model.addDeprecatedIndividual(al_id, moved_ids)
                    else:
                        LOG.error('Uncaught alleleic variant status %s',
                                  alv['allelicVariant']['status'])
Example #7
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        model = Model(self.graph)
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid, v_last_modified,
                 v_status, v_lastmodified_epoch) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                model.addClassToGraph(morphology_term_id,
                                      morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (re.match(
                        r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (re.match(
                        r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                model.addDefinition(morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    model.addDepiction(morphology_term_id, small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    model.addDepiction(morphology_term_id, large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    model.addComment(morphology_term_id, comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        model.addSynonym(
                            morphology_term_id, s.strip(),
                            model.annotation_properties['hasRelatedSynonym'])

                # morphology_term_id has page morphology_term_url
                reference = Reference(self.graph)
                reference.addPage(morphology_term_id, morphology_term_url)

                if limit is not None and line_counter > limit:
                    break
        return