Beispiel #1
0
    def create_gp_statements(self):
        """
        Create genomic_pos start stop orientation plus chromosome qualifiers
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value'][0]
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        assert isinstance(self.external_ids[genomic_pos_id_prop], str)
        external_id = self.external_ids[genomic_pos_id_prop]

        genomic_pos_ref = make_ref_source(genomic_pos_source, PROPS[genomic_pos_id_prop], external_id, login=self.login)

        s = []

        # create qualifier for chromosome (which has the refseq ID on it)
        chr_refseq = genomic_pos_value['chr']
        chr_qid = self.refseq_qid_chrom[chr_refseq]
        qualifiers = [wdi_core.WDItemID(value=chr_qid, prop_nr=PROPS['chromosome'], is_qualifier=True)]

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value['strand'] == 1 else 'Q22809711'
        s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        # genomic start and end
        s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))

        return s
    def create_main_statements(self):
        if not self.reference:
            self.create_reference()
        self.s_main = []
        for relationship in self.relationships:
            if relationship[0] not in self.do_graph.edge_prop:
                # s = "unknown relationship: {}".format(relationship[0])
                # msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown relationship")
                # wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            if relationship[1] not in self.do_graph.purl_wdid:
                s = "unknown obj: {}".format(relationship[1])
                msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown obj")
                wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            self.s_main.append(wdi_core.WDItemID(self.do_graph.purl_wdid[relationship[1]],
                                                 self.do_graph.edge_prop[relationship[0]], references=[self.reference]))
        # add http://purl.obolibrary.org/obo/, exact match
        self.s_main.append(wdi_core.WDString(self.id, PROPS['exact match'], references=[self.reference]))

        if self.doid != "DOID:4":
            # instance of disease
            self.s_main.append(wdi_core.WDItemID('Q12136', PROPS['instance of'], references=[self.reference]))

        miriam_ref = [wdi_core.WDItemID(value="Q16335166", prop_nr='P248', is_reference=True),
                      wdi_core.WDUrl("http://www.ebi.ac.uk/miriam/main/collections/MIR:00000233", 'P854',
                                     is_reference=True)]
        self.s_main.append(wdi_core.WDString("http://identifiers.org/doid/{}".format(self.doid), PROPS['exact match'],
                                             references=[miriam_ref]))
Beispiel #3
0
    def to_wikidata(self):

        refs = [[
            wdi_core.WDItemID(value='Q278487',
                              prop_nr='P248',
                              is_reference=True),  # stated in
            wdi_core.WDExternalID(value=self.cid,
                                  prop_nr='P662',
                                  is_reference=True),  # source element
            wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'),
                            prop_nr='P813',
                            is_reference=True)  # retrieved
        ]]

        elements = {'P662': self.cid[3:]}

        data = []

        for k, v in elements.items():
            if not v:
                continue

            print('{}:'.format(k), v)
            if isinstance(v, list) or isinstance(v, set):
                for x in v:
                    data.append(
                        wdi_core.WDString(prop_nr=k, value=x, references=refs))
            else:
                data.append(
                    wdi_core.WDString(prop_nr=k, value=v, references=refs))

        return data
Beispiel #4
0
    def create_statements(self):
        """
        create statements common to all genes
        """
        s = []
        if not self.entrez_ref:
            self.create_ref_sources()

        ############
        # ID statements (required)
        ############
        s.append(wdi_core.WDString(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'],
                                   references=[self.entrez_ref]))

        # optional ID statements
        if self.ensembl_ref:
            for ensembl_gene_id in self.external_ids['Ensembl Gene ID']:
                s.append(wdi_core.WDString(ensembl_gene_id, PROPS['Ensembl Gene ID'], references=[self.ensembl_ref]))

            if 'Ensembl Transcript ID' in self.external_ids:
                for id in self.external_ids['Ensembl Transcript ID']:
                    s.append(wdi_core.WDString(id, PROPS['Ensembl Transcript ID'], references=[self.ensembl_ref]))

        key = 'RefSeq RNA ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                s.append(wdi_core.WDString(id, PROPS[key], references=[self.entrez_ref]))

        for key in ['NCBI Locus tag', 'Saccharomyces Genome Database ID', 'Mouse Genome Informatics ID',
                    'MGI Gene Symbol', 'HomoloGene ID', 'Rat Genome Database ID', 'FlyBase Gene ID',
                    'Wormbase Gene ID', 'ZFIN Gene ID', 'cytogenetic location']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[self.entrez_ref]))

        ############
        # Gene statements
        ############
        # if there is an ensembl ID, this comes from ensembl, otherwise, entrez
        gene_ref = self.ensembl_ref if self.ensembl_ref is not None else self.entrez_ref

        # instance of gene, ncRNA.. etc
        type_of_gene = self.record['type_of_gene']['@value']
        assert type_of_gene in type_of_gene_map, "unknown type of gene: {}".format(type_of_gene)
        self.type_of_gene = type_of_gene
        # "protein-coding gene" will be instance of "gene"
        s.append(wdi_core.WDItemID(type_of_gene_map[type_of_gene], PROPS['instance of'], references=[gene_ref]))

        if type_of_gene not in {'protein-coding', 'pseudo', 'other', 'unknown'}:
            # make sure we add instance of "gene" as well
            s.append(wdi_core.WDItemID("Q7187", PROPS['instance of'], references=[gene_ref]))

        # found in taxon
        s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[gene_ref]))

        return s
Beispiel #5
0
def make_go_ref(curator,
                pmid_map,
                external_id,
                uniprot_id,
                evidence_wdid,
                retrieved,
                pmid=None):
    # initialize this reference for this evidence code with retrieved
    reference = [
        wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]

    # stated in pmid
    if pmid:
        if pmid in pmid_map:
            reference.append(
                wdi_core.WDItemID(pmid_map[pmid], 'P248', is_reference=True))
        else:
            raise ValueError(
                "article item for pmid {} not found. skipping item".format(
                    pmid))

    # stated in uniprot-GOA Q28018111
    reference.append(wdi_core.WDItemID('Q28018111', 'P248', is_reference=True))

    # curator
    if curator in curators_wdids:
        reference.append(
            wdi_core.WDItemID(curators_wdids[curator],
                              'P1640',
                              is_reference=True))
        # curator-specific reference URLs
        # If curator is SGD, add external ID to ref
        if curator in curator_ref and curator_ref[curator] in external_id:
            reference.append(
                wdi_core.WDString(external_id[curator_ref[curator]],
                                  PROPS[curator_ref[curator]],
                                  is_reference=True))
    else:
        raise ValueError("curator not found: {}".format(curator))

    # reference URL
    # ref_url = "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein={}".format(uniprot_id)
    ref_url = "http://www.ebi.ac.uk/QuickGO/annotations?protein={}&geneProductId=UniProtKB:{}".format(
        uniprot_id, uniprot_id)
    reference.append(wdi_core.WDString(ref_url, 'P854', is_reference=True))

    # ref determination method
    reference.append(
        wdi_core.WDItemID(evidence_wdid, 'P459', is_reference=True))

    return reference
Beispiel #6
0
    def test_new_item_creation(self):
        data = [
            wdi_core.WDString(value='test', prop_nr='P716'),
            wdi_core.WDString(value='test1', prop_nr='P76')
        ]

        item = wdi_core.WDItemEngine(item_name='dae', domain=None, data=data)

        pprint.pprint(item.get_wd_json_representation())

        if not item.get_wd_json_representation():
            raise ValueError
Beispiel #7
0
    def create_statements(self):
        """
        create statements common to all proteins
        """
        s = []

        ############
        # ID statements
        # Required: uniprot (1)
        # Optional: OMIM (1?), Ensembl protein (0 or more), refseq protein (0 or more)
        ############
        entrez_gene = self.external_ids['Entrez Gene ID']
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)
        entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                     self.external_ids['Entrez Gene ID'], login=self.login)

        s.append(wdi_core.WDString(self.external_ids['UniProt ID'], PROPS['UniProt ID'], references=[uniprot_ref]))

        for key in ['Saccharomyces Genome Database ID']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[entrez_ref]))

        key = 'Ensembl Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['ensembl']['@source'], PROPS[key], id, login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        key = 'RefSeq Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['refseq']['@source'], PROPS['Entrez Gene ID'], entrez_gene,
                                      login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        ############
        # Protein statements
        ############
        # instance of protein
        s.append(wdi_core.WDItemID("Q8054", PROPS['instance of'], references=[uniprot_ref]))

        # found in taxon
        s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[uniprot_ref]))

        # encoded by
        s.append(wdi_core.WDItemID(self.gene_wdid, PROPS['encoded by'], references=[uniprot_ref]))

        return s
Beispiel #8
0
    def create_gp_statements_chr(self):
        """
        Create genomic_pos start stop orientation on a chromosome
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)

        # create qualifier for start/stop/orientation
        chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']]
        qualifiers = [
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              is_qualifier=True)
        ]

        s = []
        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            wdi_core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        # chromosome
        s.append(
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s
Beispiel #9
0
def createNCBITaxReference(ncbiTaxId, retrieved):
    refStatedIn = wdi_core.WDItemID(value="Q13711410", prop_nr="P248", is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refNcbiTaxID = wdi_core.WDString(value=ncbiTaxId, prop_nr="P685", is_reference=True)
    ncbi_reference = [refStatedIn, refRetrieved, refNcbiTaxID]
    return ncbi_reference
Beispiel #10
0
def gene_expressed_in_organ_statements(bgee_gene_id: object,
                                       wikidata_gene_ids: list,
                                       wikidata_organ_ids: list) -> dict:
    """Get Wikidata gene id to Wikidata anatomic entity items dictionary.

    :param bgee_gene_id: the gene id used in Bgee such as an Ensembl identifier
    :param wikidata_gene_ids: the Wikidata gene identifiers that corresponds to bgee_gene_id
    :param wikidata_organ_ids: the ordered Wikidata anatomic entity items that bgee_gene_id is expressed
    :return: a dictionary where key = Wikidata gene id, value = Wikidata anatomic entity items, otherwise an empty dictionary
    """
    reference = create_reference(bgee_gene_id)
    count_order = 1
    result_dict = {}
    statements = []
    for wikidata_organ_id in wikidata_organ_ids:
        #we consider that the organs ids are already ordered
        order = wdi_core.WDString(str(count_order),
                                  PROPS['series ordinal'],
                                  is_qualifier=True)
        count_order = count_order + 1
        expressed_in_statement = wdi_core.WDItemID(wikidata_organ_id,
                                                   PROPS['expressed in'],
                                                   references=[reference],
                                                   qualifiers=[order])
        statements.append(expressed_in_statement)
    for wikidata_gene_id in wikidata_gene_ids:
        result_dict.update({wikidata_gene_id: statements})
    return result_dict
def append_taxon_and_gender(cell_line_object, data_to_add_to_wikidata,
                            list_of_taxons_of_origin,
                            list_of_biological_sexes_of_source):
    cell_line_references = cell_line_object.references_in_wdi_format

    if list_of_taxons_of_origin:
        for taxon_of_origin in list_of_taxons_of_origin:

            if taxon_of_origin == "Unknow value":
                data_to_add_to_wikidata.append(
                    wdi_core.WDString(
                        value="Unknow value",
                        prop_nr="P703",
                        qualifiers=list_of_biological_sexes_of_source,
                        references=cell_line_references,
                        snak_type='somevalue'))
            else:
                data_to_add_to_wikidata.append(
                    wdi_core.WDItemID(
                        value=taxon_of_origin,
                        prop_nr="P703",
                        qualifiers=list_of_biological_sexes_of_source,
                        references=cell_line_references))

    return data_to_add_to_wikidata
Beispiel #12
0
def set_taxon(taxid):
    ncbiTaxon = json.loads(
        requests.get(
            "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id={}&format=json"
            .format(taxid)).text)
    taxonitemStatements = []
    ncbiTaxref = createNCBITaxReference(taxid, retrieved)
    ## instance of
    taxonitemStatements.append(
        wdi_core.WDItemID(value="Q16521",
                          prop_nr="P31",
                          references=[copy.deepcopy(ncbiTaxref)]))
    ## NCBI tax id
    taxonitemStatements.append(
        wdi_core.WDExternalID(value=taxid,
                              prop_nr="P685",
                              references=[copy.deepcopy(ncbiTaxref)]))
    ## scientificname
    scientificName = ncbiTaxon["result"][taxid]['scientificname']
    taxonitemStatements.append(
        wdi_core.WDString(scientificName,
                          prop_nr="P225",
                          references=[copy.deepcopy(ncbiTaxref)]))
    item = wdi_core.WDItemEngine(data=taxonitemStatements)
    if item.get_label() == "":
        item.set_label(label=scientificName, lang="en")
    if item.get_label() != scientificName:
        item.set_aliases(aliases=[scientificName])
    if item.get_description(lang="en") == "":
        item.set_description(description="strain of virus", lang="en")
    return item
Beispiel #13
0
    def test_fastrun_label(self):
        data = [
            wdi_core.WDItemID('Q544', 'P361'),
            wdi_core.WDItemID('Q7547', 'P398'),
            wdi_core.WDString('Mars', 'P2572')
        ]
        fast_run_base_filter = {'P361': 'Q544'}
        item = wdi_core.WDItemEngine(wd_item_id="Q2",
                                     data=data,
                                     fast_run=True,
                                     fast_run_base_filter=fast_run_base_filter)

        fast_run_container = wdi_core.WDItemEngine.fast_run_store[0]

        print(fast_run_container.prop_data)
        print(fast_run_container.statements)
        print(fast_run_container.prop_dt_map)
        print(fast_run_container.rev_lookup)

        assert item.get_label('en') == "Earth"
        assert item.fast_run_container.get_language_data("Q2", 'en',
                                                         'label')[0] == "Earth"
        assert item.fast_run_container.check_language_data(
            "Q2", ['not the Earth'], 'en', 'label')
        assert "Terra" in item.get_aliases()
        """
Beispiel #14
0
def get_item_statements(i_dict, type):
    if type is not None: statements = [wdi_core.WDItemID(cfg.object_ids.get(type), prop_nr="P1")]
    else: statements = []

    for prop in i_dict.keys():
        #ignore the wikidata q value and label
        #wikidata property statements will be imported later
        if prop in ["Q", "wiki", "label"]:
            continue
        elif prop == "is_related_to" and i_dict.get("label")[0] == "Koppel Ted":
            continue #this weird case that causes internal server error
        else:
            #get the information about the property
            pid = cfg.property_ids.get(prop)
            object = cfg.property_keys.index(prop) in cfg.object_prop
            #make statements for each value of the property
            for value in i_dict.get(prop):
                if object:
                    #get the q identifier of the object if applicable
                    qid = get_local_q(value)
                    if qid is None:
                        props_missed.append([i_dict.get("label")[0], pid, value])
                        continue
                    state = wdi_core.WDItemID(qid, prop_nr=pid)
                else:
                    if len(value) > 400: value = value[:395] + "..."
                    state = wdi_core.WDString(value, prop_nr=pid)
                #add statement to the list
                statements.append(state)
    return statements
Beispiel #15
0
def remove_deprecated_statements(qid, frc, release_wdid, props, login):
    releases = set(INTERPRO_RELEASES.values()) | {'Q3047275'}
    releases = set(int(x.replace("Q", "")) for x in releases)
    # don't count this release
    releases.discard(int(release_wdid.replace("Q", "")))

    # make sure we have these props in frc
    for prop in props:
        frc.write_required([wdi_core.WDString("fake value", prop)])
    orig_statements = frc.reconstruct_statements(qid)

    s_dep = []
    for s in orig_statements:
        if any(
                any(x.get_prop_nr() == 'P248' and x.get_value() in releases
                    for x in r) for r in s.get_references()):
            setattr(s, 'remove', '')
            s_dep.append(s)

    if s_dep:
        print("-----")
        print(qid)
        print(orig_statements)
        print(s_dep)
        print([(x.get_prop_nr(), x.value) for x in s_dep])
        print([(x.get_references()[0]) for x in s_dep])
        wd_item = wdi_core.WDItemEngine(wd_item_id=qid,
                                        domain='none',
                                        data=s_dep,
                                        fast_run=False)
        wdi_helpers.try_write(wd_item,
                              '',
                              '',
                              login,
                              edit_summary="remove deprecated statements")
Beispiel #16
0
    def create_item(self,
                    label,
                    description,
                    ext_id,
                    synonyms=None,
                    type_of=None,
                    force=False):
        if (not force) and ext_id in self.dbxref_qid:
            print("item already exists: {} {}".format(self.dbxref_qid[ext_id],
                                                      ext_id))
            return None
        s = [wdi_core.WDString(ext_id, self.dbxref_pid)]
        if type_of:
            s.append(
                wdi_core.WDItemID(
                    self.dbxref_qid[type_of], self.uri_pid[
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#rdf_type'])
            )

        item = self.item_engine(item_name=label,
                                domain="foo",
                                data=s,
                                core_props=[self.dbxref_pid])
        item.set_label(label)
        if description:
            item.set_description(description)
        if synonyms:
            item.set_aliases(synonyms)
        if self.write:
            item.write(self.login)
        self.dbxref_qid[ext_id] = item.wd_item_id
Beispiel #17
0
def createUniprotReference(uniprotId, retrieved):
    refStatedIn = wdi_core.WDItemID(value="Q905695", prop_nr="P248", is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refUniprotID = wdi_core.WDString(value=uniprotId, prop_nr="P352", is_reference=True)

    reference = [refStatedIn, refRetrieved, refUniprotID]
    return reference
Beispiel #18
0
 def create_reference(self):
     """ Create wikidata references for interpro
     This same reference will be used for everything. Except for a ref to the interpro item itself
     """
     # stated in Interpro version XX.X
     ref_stated_in = wdi_core.WDItemID(self.release_wdid, 'P248', is_reference=True)
     ref_ipr = wdi_core.WDString(self.id, INTERPRO, is_reference=True)  # interpro ID
     self.reference = [ref_stated_in, ref_ipr]
Beispiel #19
0
def getGeneQid(ncbiId, ncbi_reference):
    # Parent taxon
    gene_statements = [
        wdi_core.WDString(value=ncbiId,
                          prop_nr="P351",
                          references=[copy.deepcopy(ncbi_reference)])
    ]
    return wdi_core.WDItemEngine(data=gene_statements)
Beispiel #20
0
def createNCBIGeneReference(ncbiGeneId, retrieved):
    refStatedIn = wdi_core.WDItemID(value="Q20641742", prop_nr="P248", is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refNcbiGeneID = wdi_core.WDString(value=ncbiGeneId, prop_nr="P351", is_reference=True)

    ncbi_reference = [refStatedIn, refRetrieved, refNcbiGeneID]
    return ncbi_reference
Beispiel #21
0
    def add_entity(self, property_list, result):
        """
        function to add pathway item to wikidata
        :param property_list: the list of property entries that will be made
        :param result: the data from Reactome
        :return:
        """
        et = result['entitytype']
        if et == 'COMP':
            wditem_value = 'Q420927'
        elif et == 'DS':
            wditem_value = 'Q47461827'
        elif et == 'CS':
            wditem_value = 'Q47461807'
        elif et == 'OS':
            wditem_value = 'Q49980450'
        else:
            return

        # P31 = instance of
        cpref = []
        if result['cportal'] != '':
            cpref = self.create_complex_portal_reference(result['cportal'])
        if cpref:
            property_list["P31"] = [
                wdi_core.WDItemID(
                    value=wditem_value,
                    prop_nr="P31",
                    references=[copy.deepcopy(self.reference), cpref])
            ]
        else:
            property_list["P31"] = [
                wdi_core.WDItemID(value=wditem_value,
                                  prop_nr="P31",
                                  references=[copy.deepcopy(self.reference)])
            ]

        # P2888 = exact match
        property_list["P2888"] = [
            wdi_core.WDUrl(self.match_url,
                           prop_nr='P2888',
                           references=[copy.deepcopy(self.reference)])
        ]

        # P703 = found in taxon
        property_list["P703"] = [
            wdi_core.WDItemID(value=self.species,
                              prop_nr='P703',
                              references=[copy.deepcopy(self.reference)])
        ]

        # P3937 = Reactome ID
        property_list["P3937"] = [
            wdi_core.WDString(value=self.reactome_id, prop_nr='P3937')
        ]

        self.add_entity_parts(property_list, result)
def create_reference(iNaturalist_id, retrieved):
    refStatedIn = wdi_core.WDItemID(value=ITEMS['iNaturalist'],
                                    prop_nr=PROPS['stated in'],
                                    is_reference=True)
    refReferenceUrl = wdi_core.WDString(
        value=
        "https://www.dropbox.com/s/gdcjmre6v3h9k20/inaturalist_taxon_mapping.tgz?dl=0",
        prop_nr=PROPS['reference URL'],
        is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow,
                                   prop_nr=PROPS['retrieved'],
                                   is_reference=True)
    refiNaturalistID = wdi_core.WDString(value=iNaturalist_id,
                                         prop_nr=PROPS['iNaturalist Taxon ID'],
                                         is_reference=True)
    reference = [refStatedIn, refReferenceUrl, refRetrieved, refiNaturalistID]
    return reference
Beispiel #23
0
def create_reference(spl_url,source_type):
    timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
    archived_date = datetime.strptime('9/29/2015','%m/%d/%Y').strftime("+%Y-%m-%dT00:00:00Z")
    refStatedIn = wdi_core.WDItemID(value="Q73670648", prop_nr="P248", is_reference=True)
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refRetrieved2 = wdi_core.WDTime(archived_date, prop_nr="P2960", is_reference=True)
    refURL = wdi_core.WDUrl(value=spl_url, prop_nr="P854", is_reference=True)
    reftype = wdi_core.WDString(value=source_type, prop_nr="P958", is_reference=True)
    return [refStatedIn, refRetrieved, refRetrieved2, refURL, reftype]
def append_identifiers(wikidata_id,
                       doi=None,
                       pmid=None,
                       pmcid=None,
                       nioshtic=None):
    """
    Adds identifiers such as DOI and NIOSHTIC to an existing Wikidata item.
    Reconciliation of identifiers across databases helps us root out duplicates.

    @param wikidata_id: the Q-number of the Wikidata item to edit
    @param doi: string; defaults to None
    @param pmid: string; defaults to None
    @param pmcid: string; defaults to None
    @param nioshtic: string; defaults to None
    """
    data = []
    if doi is not None:
        to_append = wdi_core.WDString(value=doi, prop_nr='P356')
        data.append(to_append)
    if pmid is not None:
        to_append = wdi_core.WDString(value=pmid, prop_nr='P698')
        data.append(to_append)
    if pmcid is not None:
        to_append = wdi_core.WDString(value=pmcid, prop_nr='P932')
        data.append(to_append)
    if nioshtic is not None:
        to_append = wdi_core.WDString(value=nioshtic, prop_nr='P2880')
        data.append(to_append)

    append_value = ['P356', 'P698', 'P932', 'P2880']
    wikidata_item = wdi_core.WDItemEngine(wd_item_id=wikidata_id,
                                          data=data,
                                          append_value=append_value)
    wikidata_item.write(WIKI_SESSION)

    if doi is None:
        doi = ''
    if pmid is None:
        pmid = ''
    if pmcid is None:
        pmcid = ''
    if nioshtic is None:
        nioshtic = ''
    print(wikidata_id + '|' + doi + '|' + pmid + '|' + pmcid + '|' + nioshtic)
Beispiel #25
0
    def to_wikidata(self):

        refs = [[
            wdi_core.WDItemID(value='Q6593799',
                              prop_nr='P248',
                              is_reference=True),  # stated in
            wdi_core.WDExternalID(value=self.unii,
                                  prop_nr='P652',
                                  is_reference=True),  # source element
            wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'),
                            prop_nr='P813',
                            is_reference=True)  # retrieved
        ]]
        print('UNII Main label is', self.label)

        elements = {
            'P652': self.unii,
            'P2017': self.smiles,
            'P235': self.stdinchikey,
            'P231': self.cas,
            'P232': self.einecs,
            'P1748': self.nci,
            'P3345': self.rxnorm
        }

        if self.smiles and len(self.smiles) > 400:
            del elements['P2017']

        data = []

        for k, v in elements.items():
            if not v:
                continue

            print('{}:'.format(k), v)
            if isinstance(v, list) or isinstance(v, set):
                for x in v:
                    data.append(
                        wdi_core.WDString(prop_nr=k, value=x, references=refs))
            else:
                data.append(
                    wdi_core.WDString(prop_nr=k, value=v, references=refs))

        return data
Beispiel #26
0
    def create_gp_statements(self):
        """
        Create genomic_pos start stop orientation no chromosome
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)

        s = []

        # create qualifier for chromosome REFSEQ ID (not chrom item)
        chromosome = genomic_pos_value['chr']
        rs_chrom = wdi_core.WDString(value=chromosome,
                                     prop_nr='P2249',
                                     is_qualifier=True)

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            wdi_core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        # genomic start and end
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))

        return s
Beispiel #27
0
    def test_new_item_creation(self):
        data = [
            wdi_core.WDString(value='test', prop_nr='P1'),
            wdi_core.WDString(value='test1', prop_nr='P2'),
            wdi_core.WDMath("xxx", prop_nr="P3"),
            wdi_core.WDExternalID("xxx", prop_nr="P4"),
            wdi_core.WDItemID("Q123", prop_nr="P5"),
            wdi_core.WDTime('+%Y-%m-%dT%H:%M:%SZ', "P6"),
            wdi_core.WDUrl("http://www.google.com", "P7"),
            wdi_core.WDMonolingualText("xxx", prop_nr="P8"),
            wdi_core.WDQuantity(5, prop_nr="P9"),
            wdi_core.WDQuantity(5, upper_bound=9, lower_bound=2,
                                prop_nr="P10"),
            wdi_core.WDCommonsMedia("xxx", prop_nr="P11"),
            wdi_core.WDGlobeCoordinate(1.2345, 1.2345, 12, prop_nr="P12"),
            wdi_core.WDGeoShape("xxx", prop_nr="P13"),
            wdi_core.WDProperty("P123", "P14")
        ]
        core_props = set(["P{}".format(x) for x in range(20)])

        for d in data:
            item = wdi_core.WDItemEngine(item_name='dae',
                                         domain="szadf",
                                         data=[d],
                                         core_props=core_props)
            assert item.get_wd_json_representation()
            item = wdi_core.WDItemEngine(item_name='dae',
                                         domain="szadf",
                                         data=[d],
                                         core_props=set())
            assert item.get_wd_json_representation()

        item = wdi_core.WDItemEngine(item_name='dae',
                                     domain="szadf",
                                     data=data,
                                     core_props=core_props)
        assert item.get_wd_json_representation()
        item = wdi_core.WDItemEngine(item_name='dae',
                                     domain="szadf",
                                     data=data,
                                     core_props=set())
        assert item.get_wd_json_representation()
Beispiel #28
0
    def create_gp_statements_chr(self):
        """
        Create genomic_pos start stop orientation on a chromosome
        :return:
        """
        if not self.entrez_ref:
            self.create_ref_sources()

        genomic_pos_values = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        if genomic_pos_source['id'] == "entrez":
            genomic_pos_ref = self.entrez_ref
        elif genomic_pos_source['id'] == "ensembl":
            genomic_pos_ref = self.ensembl_ref
        else:
            raise ValueError()
        if not genomic_pos_ref:
            return None
        all_chr = set([self.chr_num_wdid[x['chr']] for x in genomic_pos_values])
        all_strand = set(['Q22809680' if x['strand'] == 1 else 'Q22809711' for x in genomic_pos_values])

        s = []
        for genomic_pos_value in genomic_pos_values:
            # create qualifier for start/stop/orientation
            chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']]
            qualifiers = [wdi_core.WDItemID(chrom_wdid, PROPS['chromosome'], is_qualifier=True)]

            # genomic start and end
            s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'],
                                       references=[genomic_pos_ref], qualifiers=qualifiers))
            s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'],
                                       references=[genomic_pos_ref], qualifiers=qualifiers))

        for chr in all_chr:
            s.append(wdi_core.WDItemID(chr, PROPS['chromosome'], references=[genomic_pos_ref]))

        if len(all_strand) == 1:
            # todo: not sure what to do if you have both orientations on the same chr
            strand_orientation = list(all_strand)[0]
            s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'], references=[genomic_pos_ref]))

        return s
Beispiel #29
0
def get_list_of_biological_sexes(cell_line_object, list_of_taxons_of_origin):
    list_of_biological_sexes_of_source = []

    cell_line_sexes_of_source = cell_line_object.cell_line_dump["SX"]

    for biological_sex_of_source in cell_line_sexes_of_source:

        if biological_sex_of_source == "Sex unspecified":
            list_of_biological_sexes_of_source.append(
                wdi_core.WDString(
                    value="Unknow value",
                    prop_nr="P21",
                    is_qualifier=True,
                    snak_type="somevalue",
                ))

        else:
            dict_for_human_sexes = {
                "Female": "Q6581072",
                "Male": "Q6581097",
                "Sex ambiguous": "Q1097630",
            }

            dict_for_non_human_sexes = {
                "Female": "Q43445",
                "Male": "Q44148",
                "Sex ambiguous": "Q28873047",
            }

            id_for_homo_sapiens = "Q15978631"

            if id_for_homo_sapiens in list_of_taxons_of_origin:
                biological_sex_id = dict_for_human_sexes[
                    biological_sex_of_source]

            else:
                if biological_sex_of_source == "Mixed sex":
                    biological_sex_id = "Q43445"
                    list_of_biological_sexes_of_source.append(
                        wdi_core.WDItemID(value="Q44148",
                                          prop_nr="P21",
                                          is_qualifier=True))

                else:
                    biological_sex_id = dict_for_non_human_sexes[
                        biological_sex_of_source]

            list_of_biological_sexes_of_source.append(
                wdi_core.WDItemID(value=biological_sex_id,
                                  prop_nr="P21",
                                  is_qualifier=True))

    return list_of_biological_sexes_of_source
def create_property(label, description, property_datatype, equiv_props, login):
    s = [
        wdi_core.WDString(equiv_prop, get_quiv_prop_pid())
        for equiv_prop in equiv_props
    ]
    item = localItemEngine(item_name=label, domain="foo", data=s)
    item.set_label(label)
    item.set_description(description)
    item.write(login,
               entity_type="property",
               property_datatype=property_datatype)
    return item