Esempio n. 1
0
    def test_fastrun_label(self):
        data = [
            wdi_core.WDItemID('Q544', 'P361'),
            wdi_core.WDItemID('Q7547', 'P398'),
            wdi_core.WDString('Mars', 'P2572')
        ]
        fast_run_base_filter = {'P361': 'Q544'}
        item = wdi_core.WDItemEngine(wd_item_id="Q2",
                                     data=data,
                                     fast_run=True,
                                     fast_run_base_filter=fast_run_base_filter)

        fast_run_container = wdi_core.WDItemEngine.fast_run_store[0]

        print(fast_run_container.prop_data)
        print(fast_run_container.statements)
        print(fast_run_container.prop_dt_map)
        print(fast_run_container.rev_lookup)

        assert item.get_label('en') == "Earth"
        assert item.fast_run_container.get_language_data("Q2", 'en',
                                                         'label')[0] == "Earth"
        assert item.fast_run_container.check_language_data(
            "Q2", ['not the Earth'], 'en', 'label')
        assert "Terra" in item.get_aliases()
        """
Esempio n. 2
0
    def create_references(self, gdr):

        references = []

        # Reference URL for phenocarta
        references.append(
            wdi_core.WDUrl(value=gdr.phenocarta_url,
                           prop_nr=PROPS['reference URL'],
                           is_reference=True))

        # Reference URL for genome.gov
        references.append(
            wdi_core.WDUrl(value=gdr.link,
                           prop_nr=PROPS['reference URL'],
                           is_reference=True))

        # Stated in Phenocarta
        references.append(
            wdi_core.WDItemID(value='Q22330995',
                              prop_nr=PROPS['stated in'],
                              is_reference=True))

        # Stated in PubMed
        references.append(
            wdi_core.WDItemID(value=self.pmid_qid_map[gdr.pmid],
                              prop_nr=PROPS['stated in'],
                              is_reference=True))

        # Date retrieved
        references.append(
            wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()),
                            prop_nr=PROPS['retrieved'],
                            is_reference=True))

        return references
    def create_main_statements(self):
        if not self.reference:
            self.create_reference()
        self.s_main = []
        for relationship in self.relationships:
            if relationship[0] not in self.do_graph.edge_prop:
                # s = "unknown relationship: {}".format(relationship[0])
                # msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown relationship")
                # wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            if relationship[1] not in self.do_graph.purl_wdid:
                s = "unknown obj: {}".format(relationship[1])
                msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown obj")
                wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            self.s_main.append(wdi_core.WDItemID(self.do_graph.purl_wdid[relationship[1]],
                                                 self.do_graph.edge_prop[relationship[0]], references=[self.reference]))
        # add http://purl.obolibrary.org/obo/, exact match
        self.s_main.append(wdi_core.WDString(self.id, PROPS['exact match'], references=[self.reference]))

        if self.doid != "DOID:4":
            # instance of disease
            self.s_main.append(wdi_core.WDItemID('Q12136', PROPS['instance of'], references=[self.reference]))

        miriam_ref = [wdi_core.WDItemID(value="Q16335166", prop_nr='P248', is_reference=True),
                      wdi_core.WDUrl("http://www.ebi.ac.uk/miriam/main/collections/MIR:00000233", 'P854',
                                     is_reference=True)]
        self.s_main.append(wdi_core.WDString("http://identifiers.org/doid/{}".format(self.doid), PROPS['exact match'],
                                             references=[miriam_ref]))
Esempio n. 4
0
def test_ref_equals():
    # statements are identical
    oldref = [
        wdi_core.WDExternalID(value='P58742', prop_nr='P352'),
        wdi_core.WDItemID(value='Q24784025', prop_nr='P527'),
        wdi_core.WDTime('+2001-12-31T12:01:13Z', prop_nr='P813')
    ]
    olditem = wdi_core.WDItemID("Q123", "P123", references=[oldref])
    newitem = copy.deepcopy(olditem)
    assert olditem.equals(newitem, include_ref=False)
    assert olditem.equals(newitem, include_ref=True)

    # dates are a month apart
    newitem = copy.deepcopy(olditem)
    newitem.references[0][2] = wdi_core.WDTime('+2002-1-31T12:01:13Z',
                                               prop_nr='P813')
    assert olditem.equals(newitem, include_ref=False)
    assert not olditem.equals(newitem, include_ref=True)

    # multiple refs
    newitem = copy.deepcopy(olditem)
    newitem.references.append(
        [wdi_core.WDExternalID(value='99999', prop_nr='P352')])
    assert olditem.equals(newitem, include_ref=False)
    assert not olditem.equals(newitem, include_ref=True)
    olditem.references.append(
        [wdi_core.WDExternalID(value='99999', prop_nr='P352')])
    assert olditem.equals(newitem, include_ref=True)
Esempio n. 5
0
    def make_statement_from_edge(self, edge):
        # custom statement creator for regulates
        h = self.helper
        if edge['pred'] in {
                'http://purl.obolibrary.org/obo/RO_0002212',
                'http://purl.obolibrary.org/obo/RO_0002213'
        }:
            subj_node = self.uri_node_map[edge['sub']]
            obj_qid = self.get_object_qid(edge['obj'])
            # print(obj_qid, edge['pred'])
            qual_qid = self.uri_node_map[self.regulates[edge['pred']]].qid
            pred_pid = self.PRED_PID_MAP[
                'http://purl.obolibrary.org/obo/RO_0002211']

            if not (obj_qid and qual_qid and pred_pid):
                m = wdi_helpers.format_msg(edge['sub'], None, None,
                                           "failed on edge: {}".format(edge))
                print(m)
                wdi_core.WDItemEngine.log("WARNING", m)
                return None

            qualifier = wdi_core.WDItemID(qual_qid,
                                          h.get_pid(PROPS['subject has role']),
                                          is_qualifier=True)
            return wdi_core.WDItemID(
                obj_qid,
                pred_pid,
                qualifiers=[qualifier],
                references=[subj_node.create_ref_statement()])
        else:
            return super(GOGraph, self).make_statement_from_edge(edge)
Esempio n. 6
0
def do_item(entrezgene, orthologs, reference, entrez_homo, entrez_taxon,
            taxon_wdid, entrez_wdid, login, write):
    entrezgene = str(entrezgene)
    s = []
    this_ref = reference(entrez_homo[entrezgene])
    for ortholog in orthologs:
        ortholog = str(ortholog)
        if ortholog == entrezgene:
            continue
        if ortholog not in entrez_taxon:
            raise ValueError("missing taxid for: " + ortholog)
        qualifier = wdi_core.WDItemID(taxon_wdid[entrez_taxon[ortholog]],
                                      PROPS['found in taxon'],
                                      is_qualifier=True)
        s.append(
            wdi_core.WDItemID(entrez_wdid[ortholog],
                              PROPS['ortholog'],
                              references=[this_ref],
                              qualifiers=[qualifier]))
    item = wdi_core.WDItemEngine(wd_item_id=entrez_wdid[entrezgene],
                                 data=s,
                                 fast_run=fast_run,
                                 fast_run_base_filter={
                                     PROPS['Entrez Gene ID']:
                                     '',
                                     PROPS['found in taxon']:
                                     taxon_wdid[entrez_taxon[entrezgene]]
                                 },
                                 core_props=core_props)
    wdi_helpers.try_write(item,
                          entrezgene,
                          PROPS['Entrez Gene ID'],
                          edit_summary="edit orthologs",
                          login=login,
                          write=write)
Esempio n. 7
0
    def task_processor(self, task, n):
        ref = [[
            wdi_core.WDItemID(value=self.source,
                              prop_nr='P248',
                              is_reference=True),
            wdi_core.WDUrl(value=self.url_pattern +
                           urllib.parse.quote_plus(str(task[1])),
                           prop_nr='P854',
                           is_reference=True),
            wdi_core.WDTime(task[2], prop_nr='P813', is_reference=True)
        ]]

        data = []
        for cited_item in task[3]:
            data.append(
                wdi_core.WDItemID(value='Q' + str(cited_item),
                                  prop_nr='P2860',
                                  references=ref))

        itemengine = self.integrator[n]['core'].WDItemEngine(
            wd_item_id='Q' + str(task[0]),
            data=data,
            append_value=self.append_value,
            good_refs=self.good_refs,
            keep_good_ref_statements=True)
        print(
            itemengine.write(self.integrator[n]['login'],
                             edit_summary=self.edit_summary))
Esempio n. 8
0
    def create(self, login):
        # get names of components
        labels = getConceptLabels(self.component_qids)

        name = " / ".join(labels.values()) + " combination therapy"
        description = "combination therapy"

        # has part
        s = [
            wdi_core.WDItemID(x, PROPS['has part'])
            for x in self.component_qids
        ]
        # instance of combination therapy
        s.append(wdi_core.WDItemID("Q1304270", PROPS['instance of']))

        item = wdi_core.WDItemEngine(item_name=name, data=s, domain="asdf")
        item.set_label(name)
        item.set_description(description)
        success = try_write(item,
                            record_id=";".join(self.component_qids),
                            record_prop='',
                            login=login)
        if success:
            self.combo_qid[self.component_qids] = item.wd_item_id
            self.qid_combo[item.wd_item_id] = self.component_qids
            return item.wd_item_id
        else:
            raise ValueError("unsuccessful item creation")
Esempio n. 9
0
    def add_active_ingredient(self, ingredient_qid):
        assert self.qid
        s = [
            wdi_core.WDItemID(ingredient_qid,
                              'P3781',
                              references=make_ref(self.rxcui))
        ]
        # purposely overwriting this
        item = wdi_core.WDItemEngine(
            wd_item_id=self.qid,
            data=s,
            fast_run=True,
            fast_run_use_refs=True,
            fast_run_base_filter={"P3345": ""},
            ref_handler=ref_handlers.update_retrieved_if_new)
        item.write(self.login)

        # and adding the inverse
        s = [
            wdi_core.WDItemID(self.qid,
                              'P3780',
                              references=make_ref(self.rxcui))
        ]
        # do not overwrite
        item = wdi_core.WDItemEngine(
            wd_item_id=ingredient_qid,
            data=s,
            fast_run=True,
            fast_run_use_refs=True,
            fast_run_base_filter={"P3345": ""},
            ref_handler=ref_handlers.update_retrieved_if_new,
            append_value=['P3780'])
        item.write(self.login)
Esempio n. 10
0
    def create_gp_statements(self):
        """
        Create genomic_pos start stop orientation plus chromosome qualifiers
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value'][0]
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        assert isinstance(self.external_ids[genomic_pos_id_prop], str)
        external_id = self.external_ids[genomic_pos_id_prop]

        genomic_pos_ref = make_ref_source(genomic_pos_source, PROPS[genomic_pos_id_prop], external_id, login=self.login)

        s = []

        # create qualifier for chromosome (which has the refseq ID on it)
        chr_refseq = genomic_pos_value['chr']
        chr_qid = self.refseq_qid_chrom[chr_refseq]
        qualifiers = [wdi_core.WDItemID(value=chr_qid, prop_nr=PROPS['chromosome'], is_qualifier=True)]

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value['strand'] == 1 else 'Q22809711'
        s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        # genomic start and end
        s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))

        return s
Esempio n. 11
0
 def create_reference(source_str, evidence_level, login):
     """
     Reference is:
     curator: Cancer Biomarkers database
     retrieved: date
     stated in: links to pmid items
     no reference URL
     """
     reference = [
         wdi_core.WDItemID(ITEMS['Cancer Biomarkers database'],
                           PROPS['curator'],
                           is_reference=True)
     ]
     t = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
     reference.append(
         wdi_core.WDTime(t, prop_nr=PROPS['retrieved'], is_reference=True))
     for source in source_str.split(";"):
         if source.startswith("PMID:"):
             qid, _, success = wdi_helpers.PublicationHelper(
                 source.replace("PMID:", ""),
                 id_type="pmid",
                 source="europepmc").get_or_create(login)
             if success:
                 reference.append(
                     wdi_core.WDItemID(qid,
                                       PROPS['stated in'],
                                       is_reference=True))
         elif source in source_map:
             reference.append(
                 wdi_core.WDItemID(source_map[source],
                                   PROPS['stated in'],
                                   is_reference=True))
         else:
             print("unknown source: {}".format(source))
     return reference
def do_pharm_prod(drug_qid, brand_rxnorm, emea, url, brand_name):
    # write info on the pharmaceutical product page
    ref = create_ref_statement(emea, url)
    # has active substance
    s = [wdi_core.WDItemID(drug_qid, 'P3781', references=[ref])]
    # instance of
    s.append(wdi_core.WDItemID('Q28885102', 'P31',
                               references=[ref]))  # pharmaceutical product
    s.append(wdi_core.WDItemID('Q169336', 'P31',
                               references=[ref]))  # chemical mixture
    # emea
    s.append(wdi_core.WDExternalID(emea, 'P3637', references=[ref]))

    if not pd.isnull(brand_rxnorm):
        s.append(wdi_core.WDExternalID(str(int(brand_rxnorm)), "P3345"))
    item = wdi_core.WDItemEngine(item_name=brand_name,
                                 data=s,
                                 domain="drugs",
                                 append_value=['P3781'])
    item.set_label(brand_name)
    if item.get_description() == '':
        item.set_description("pharmaceutical product")
    wdi_helpers.try_write(item,
                          emea,
                          'P3637',
                          login,
                          edit_summary="add 'active ingredient'")

    return item.wd_item_id
Esempio n. 13
0
    def process_relationship(self, gene_wdid, doid_wdid, gdr):
        """
        Process will involve creating disease items with references
        to the genes and vice-versa.
        """

        # Create updated references
        genetic_assoc_ref = self.create_references(gdr)
        qualifiers = self.create_qualifiers(gdr)

        # Attach the created genetic association references to this disease phenotype
        disease_item = wdi_core.WDItemID(value=doid_wdid,
                                         prop_nr=PROPS["genetic association"],
                                         references=[genetic_assoc_ref],
                                         qualifiers=qualifiers,
                                         check_qualifier_equality=False)

        # Repeat for attaching updated gene information to disease

        # Create updated references
        genetic_assoc_ref = self.create_references(gdr)
        qualifiers = self.create_qualifiers(gdr)

        # Attach the created genetic association references to this disease phenotype
        gene_item = wdi_core.WDItemID(value=gene_wdid,
                                      prop_nr=PROPS["genetic association"],
                                      references=[genetic_assoc_ref],
                                      qualifiers=qualifiers,
                                      check_qualifier_equality=False)

        return {'disease_item': disease_item, 'gene_item': gene_item}
Esempio n. 14
0
def get_item_statements(i_dict, type):
    if type is not None: statements = [wdi_core.WDItemID(cfg.object_ids.get(type), prop_nr="P1")]
    else: statements = []

    for prop in i_dict.keys():
        #ignore the wikidata q value and label
        #wikidata property statements will be imported later
        if prop in ["Q", "wiki", "label"]:
            continue
        elif prop == "is_related_to" and i_dict.get("label")[0] == "Koppel Ted":
            continue #this weird case that causes internal server error
        else:
            #get the information about the property
            pid = cfg.property_ids.get(prop)
            object = cfg.property_keys.index(prop) in cfg.object_prop
            #make statements for each value of the property
            for value in i_dict.get(prop):
                if object:
                    #get the q identifier of the object if applicable
                    qid = get_local_q(value)
                    if qid is None:
                        props_missed.append([i_dict.get("label")[0], pid, value])
                        continue
                    state = wdi_core.WDItemID(qid, prop_nr=pid)
                else:
                    if len(value) > 400: value = value[:395] + "..."
                    state = wdi_core.WDString(value, prop_nr=pid)
                #add statement to the list
                statements.append(state)
    return statements
Esempio n. 15
0
    def create(self, label: str, rxcui: str, ingredient_qids: list):
        rxcui = str(rxcui)
        # check to make sure it doesn't exist
        if rxcui in self.rxnorm_qid:
            raise ValueError("rxcui {} already exists: {}".format(rxcui, self.rxnorm_qid[rxcui]))
        # check by ingredients
        qid = self.get_mixture_qid(ingredient_qids)
        if qid:
            raise ValueError("mixture already exists: {}".format(qid))

        # has part
        s = [wdi_core.WDItemID(x, 'P527', references=make_ref(rxcui)) for x in ingredient_qids]
        # instance of
        s.append(wdi_core.WDItemID('Q12140', 'P31', references=make_ref(rxcui)))  # drug
        s.append(wdi_core.WDItemID('Q79529', 'P31', references=make_ref(rxcui)))  # chemical substance
        s.append(wdi_core.WDItemID('Q169336', 'P31', references=make_ref(rxcui)))  # mixture
        # rxnorm
        s.append(wdi_core.WDExternalID(rxcui, "P3345", references=make_ref(rxcui)))

        item = wdi_core.WDItemEngine(item_name=label, data=s, domain="drugs")
        if item.create_new_item:
            item.set_label(label)
        item.set_label(label)
        if not item.get_description():
            item.set_description("combination drug")
        item.write(self.login)
        qid = item.wd_item_id

        # update cache
        self.components_mixture[frozenset(ingredient_qids)] = qid
        self.mixture_components[qid] = ingredient_qids
        self.rxnorm_qid[rxcui] = qid

        return qid
Esempio n. 16
0
 def create_reference(omim, pmid, login=None):
     """
     Reference is:
     retrieved: date
     stated in: links to pmid items
     optional reference URL
     """
     #
     ref = [
         wdi_core.WDItemID(ITEMS['MitoDB'],
                           PROPS['curator'],
                           is_reference=True)
     ]
     t = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
     ref.append(
         wdi_core.WDTime(t, prop_nr=PROPS['retrieved'], is_reference=True))
     pmid_qid, _, success = PublicationHelper(
         ext_id=pmid, id_type='pmid',
         source="europepmc").get_or_create(login)
     if success is True:
         ref.append(
             wdi_core.WDItemID(pmid_qid,
                               PROPS['stated in'],
                               is_reference=True))
     ref_url = "http://mitodb.com/symptoms.php?oid={}&symptoms=Show"
     ref.append(
         wdi_core.WDUrl(ref_url.format(omim),
                        PROPS['reference URL'],
                        is_reference=True))
     return ref
Esempio n. 17
0
def append_literature_descriptions(cell_line_object, data_to_add_to_wikidata):
    pubmed_ids_and_DOIs_in_wikidata = cell_line_object.references
    reference_publication_ids = cell_line_object.cell_line_dump["RX"]

    references_in_wdi_format = cell_line_object.references_in_wdi_format
    for reference_id in reference_publication_ids:
        if reference_id.startswith("PubMed"):
            pubmed = reference_id.strip("PubMed=")

            if pubmed in pubmed_ids_and_DOIs_in_wikidata:
                # P1343:described by source
                data_to_add_to_wikidata.append(
                    wdi_core.WDItemID(
                        value=pubmed_ids_and_DOIs_in_wikidata[pubmed],
                        prop_nr="P1343",
                        references=references_in_wdi_format))

        elif reference_id.startswith("DOI"):
            doi = reference_id.strip("DOI=")

            if doi in pubmed_ids_and_DOIs_in_wikidata:
                data_to_add_to_wikidata.append(
                    wdi_core.WDItemID(
                        value=pubmed_ids_and_DOIs_in_wikidata[doi],
                        prop_nr="P1343",
                        references=references_in_wdi_format))

    return data_to_add_to_wikidata
Esempio n. 18
0
def test_append_props():
    qid = 'Q3402672'
    # https://www.wikidata.org/wiki/Q3402672#P527

    # don't consider refs
    statements = [wdi_core.WDItemID(value='Q24784025', prop_nr='P527')]
    frc = fake_query_data_append_props(base_filter={
        'P352': '',
        'P703': 'Q15978631'
    },
                                       base_data_type=wdi_core.WDBaseDataType,
                                       engine=wdi_core.WDItemEngine)
    assert frc.write_required(data=statements, append_props=['P527'],
                              cqid=qid) is False
    assert frc.write_required(data=statements, cqid=qid)

    # if we are in append mode, and the refs are different, we should write
    statements = [wdi_core.WDItemID(value='Q24784025', prop_nr='P527')]
    frc = fake_query_data_append_props(base_filter={
        'P352': '',
        'P703': 'Q15978631'
    },
                                       base_data_type=wdi_core.WDBaseDataType,
                                       engine=wdi_core.WDItemEngine,
                                       use_refs=True)
    assert frc.write_required(data=statements, append_props=['P527'],
                              cqid=qid) is True
    assert frc.write_required(data=statements, cqid=qid)
Esempio n. 19
0
    def create_relationships(self, login, write=True):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            wdi_core.WDItemEngine.log("ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference])]
        if self.parent:
            # subclass of
            statements.append(wdi_core.WDItemID(value=self.parent_wdid, prop_nr='P279', references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(wdi_core.WDItemID(value=c, prop_nr='P527', references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(wdi_core.WDItemID(value=f, prop_nr='P361', references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = wdi_core.WDItemEngine(wd_item_id=self.wdid, domain='interpro', data=statements,
                                        append_value=['P279', 'P527', 'P361'],
                                        fast_run=True, fast_run_base_filter=IPRTerm.fast_run_base_filter)

        wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, edit_summary="create/update subclass/has part/part of",
                              write=write)
Esempio n. 20
0
    def add_entity(self, property_list, result):
        """
        function to add pathway item to wikidata
        :param property_list: the list of property entries that will be made
        :param result: the data from Reactome
        :return:
        """
        et = result['entitytype']
        if et == 'COMP':
            wditem_value = 'Q420927'
        elif et == 'DS':
            wditem_value = 'Q47461827'
        elif et == 'CS':
            wditem_value = 'Q47461807'
        elif et == 'OS':
            wditem_value = 'Q49980450'
        else:
            return

        # P31 = instance of
        cpref = []
        if result['cportal'] != '':
            cpref = self.create_complex_portal_reference(result['cportal'])
        if cpref:
            property_list["P31"] = [
                wdi_core.WDItemID(
                    value=wditem_value,
                    prop_nr="P31",
                    references=[copy.deepcopy(self.reference), cpref])
            ]
        else:
            property_list["P31"] = [
                wdi_core.WDItemID(value=wditem_value,
                                  prop_nr="P31",
                                  references=[copy.deepcopy(self.reference)])
            ]

        # P2888 = exact match
        property_list["P2888"] = [
            wdi_core.WDUrl(self.match_url,
                           prop_nr='P2888',
                           references=[copy.deepcopy(self.reference)])
        ]

        # P703 = found in taxon
        property_list["P703"] = [
            wdi_core.WDItemID(value=self.species,
                              prop_nr='P703',
                              references=[copy.deepcopy(self.reference)])
        ]

        # P3937 = Reactome ID
        property_list["P3937"] = [
            wdi_core.WDString(value=self.reactome_id, prop_nr='P3937')
        ]

        self.add_entity_parts(property_list, result)
Esempio n. 21
0
    def create_statements(self):
        """
        create statements common to all genes
        """
        s = []
        if not self.entrez_ref:
            self.create_ref_sources()

        ############
        # ID statements (required)
        ############
        s.append(wdi_core.WDString(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'],
                                   references=[self.entrez_ref]))

        # optional ID statements
        if self.ensembl_ref:
            for ensembl_gene_id in self.external_ids['Ensembl Gene ID']:
                s.append(wdi_core.WDString(ensembl_gene_id, PROPS['Ensembl Gene ID'], references=[self.ensembl_ref]))

            if 'Ensembl Transcript ID' in self.external_ids:
                for id in self.external_ids['Ensembl Transcript ID']:
                    s.append(wdi_core.WDString(id, PROPS['Ensembl Transcript ID'], references=[self.ensembl_ref]))

        key = 'RefSeq RNA ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                s.append(wdi_core.WDString(id, PROPS[key], references=[self.entrez_ref]))

        for key in ['NCBI Locus tag', 'Saccharomyces Genome Database ID', 'Mouse Genome Informatics ID',
                    'MGI Gene Symbol', 'HomoloGene ID', 'Rat Genome Database ID', 'FlyBase Gene ID',
                    'Wormbase Gene ID', 'ZFIN Gene ID', 'cytogenetic location']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[self.entrez_ref]))

        ############
        # Gene statements
        ############
        # if there is an ensembl ID, this comes from ensembl, otherwise, entrez
        gene_ref = self.ensembl_ref if self.ensembl_ref is not None else self.entrez_ref

        # instance of gene, ncRNA.. etc
        type_of_gene = self.record['type_of_gene']['@value']
        assert type_of_gene in type_of_gene_map, "unknown type of gene: {}".format(type_of_gene)
        self.type_of_gene = type_of_gene
        # "protein-coding gene" will be instance of "gene"
        s.append(wdi_core.WDItemID(type_of_gene_map[type_of_gene], PROPS['instance of'], references=[gene_ref]))

        if type_of_gene not in {'protein-coding', 'pseudo', 'other', 'unknown'}:
            # make sure we add instance of "gene" as well
            s.append(wdi_core.WDItemID("Q7187", PROPS['instance of'], references=[gene_ref]))

        # found in taxon
        s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[gene_ref]))

        return s
Esempio n. 22
0
def make_go_ref(curator,
                pmid_map,
                external_id,
                uniprot_id,
                evidence_wdid,
                retrieved,
                pmid=None):
    # initialize this reference for this evidence code with retrieved
    reference = [
        wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]

    # stated in pmid
    if pmid:
        if pmid in pmid_map:
            reference.append(
                wdi_core.WDItemID(pmid_map[pmid], 'P248', is_reference=True))
        else:
            raise ValueError(
                "article item for pmid {} not found. skipping item".format(
                    pmid))

    # stated in uniprot-GOA Q28018111
    reference.append(wdi_core.WDItemID('Q28018111', 'P248', is_reference=True))

    # curator
    if curator in curators_wdids:
        reference.append(
            wdi_core.WDItemID(curators_wdids[curator],
                              'P1640',
                              is_reference=True))
        # curator-specific reference URLs
        # If curator is SGD, add external ID to ref
        if curator in curator_ref and curator_ref[curator] in external_id:
            reference.append(
                wdi_core.WDString(external_id[curator_ref[curator]],
                                  PROPS[curator_ref[curator]],
                                  is_reference=True))
    else:
        raise ValueError("curator not found: {}".format(curator))

    # reference URL
    # ref_url = "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein={}".format(uniprot_id)
    ref_url = "http://www.ebi.ac.uk/QuickGO/annotations?protein={}&geneProductId=UniProtKB:{}".format(
        uniprot_id, uniprot_id)
    reference.append(wdi_core.WDString(ref_url, 'P854', is_reference=True))

    # ref determination method
    reference.append(
        wdi_core.WDItemID(evidence_wdid, 'P459', is_reference=True))

    return reference
Esempio n. 23
0
def get_list_of_biological_sexes(cell_line_object, list_of_taxons_of_origin):
    list_of_biological_sexes_of_source = []

    cell_line_sexes_of_source = cell_line_object.cell_line_dump["SX"]

    for biological_sex_of_source in cell_line_sexes_of_source:

        if biological_sex_of_source == "Sex unspecified":
            list_of_biological_sexes_of_source.append(
                wdi_core.WDString(
                    value="Unknow value",
                    prop_nr="P21",
                    is_qualifier=True,
                    snak_type="somevalue",
                ))

        else:
            dict_for_human_sexes = {
                "Female": "Q6581072",
                "Male": "Q6581097",
                "Sex ambiguous": "Q1097630",
            }

            dict_for_non_human_sexes = {
                "Female": "Q43445",
                "Male": "Q44148",
                "Sex ambiguous": "Q28873047",
            }

            id_for_homo_sapiens = "Q15978631"

            if id_for_homo_sapiens in list_of_taxons_of_origin:
                biological_sex_id = dict_for_human_sexes[
                    biological_sex_of_source]

            else:
                if biological_sex_of_source == "Mixed sex":
                    biological_sex_id = "Q43445"
                    list_of_biological_sexes_of_source.append(
                        wdi_core.WDItemID(value="Q44148",
                                          prop_nr="P21",
                                          is_qualifier=True))

                else:
                    biological_sex_id = dict_for_non_human_sexes[
                        biological_sex_of_source]

            list_of_biological_sexes_of_source.append(
                wdi_core.WDItemID(value=biological_sex_id,
                                  prop_nr="P21",
                                  is_qualifier=True))

    return list_of_biological_sexes_of_source
def findAndSetIncreasedForm(qid, first, second):
    if first > 0 and second > 0:
        if first >= 4 * second:
            write2wikidata(qid, 'P1911', eb, [
                wdi_core.WDItemID(value=rb, prop_nr='P2210', is_qualifier=True)
            ])
        elif second >= 4 * first:
            write2wikidata(qid, 'P1911', rb, [
                wdi_core.WDItemID(value=eb, prop_nr='P2210', is_qualifier=True)
            ])
    else:
        return
def create_reference(observation_id, license):
    refStatedIn = wdi_core.WDItemID(value='Q16958215',
                                    prop_nr='P248',
                                    is_reference=True)
    refiNatObservationId = wdi_core.WDString(value=str(observation_id),
                                             prop_nr='P5683',
                                             is_reference=True)
    refLicense = wdi_core.WDItemID(value=license,
                                   prop_nr="P275",
                                   is_reference=True)

    reference = [refStatedIn, refiNatObservationId, refLicense]
    return reference
Esempio n. 26
0
def create_or_update_refseq_protein_item(geneid, refseqID):
    statements = []
    retrieved = datetime.now()
    ncbi_reference = createNCBIGeneReference(hit["entrezgene"], retrieved)
    pdb = []
    # Instance of protein
    statements.append(wdi_core.WDItemID(value="Q8054", prop_nr="P31", references=[copy.deepcopy(ncbi_reference)]))

    # encoded by
    geneitem = getGeneQid(geneid, ncbi_reference)
    geneqid = geneitem.wd_item_id
    statements.append(wdi_core.WDItemID(value=geneqid, prop_nr="P702", references=[copy.deepcopy(ncbi_reference)]))

    # found in taxon
    geneJson = geneitem.get_wd_json_representation()
    taxonQID = geneJson['claims']["P703"][0]["mainsnak"]["datavalue"]["value"]["id"]
    statements.append(wdi_core.WDItemID(taxonQID, prop_nr="P703", references=[copy.deepcopy(ncbi_reference)]))

    # refseq
    statements.append(wdi_core.WDString(refseqID, prop_nr="P637", references=[copy.deepcopy(ncbi_reference)]))

    handle = Entrez.efetch(id=geneinfo["refseq"]["protein"], db='protein', rettype='gb', retmode='text')
    record = SeqIO.read(handle, 'genbank')
    for feature in record.features:
        if feature.type.lower() == "protein":
            print(feature.qualifiers['product'])
            protein_label = feature.qualifiers['product'][0]
    taxonname = getTaxonItem(geneJson['claims']["P703"][0]["mainsnak"]["datavalue"]["value"]["id"]).get_label(lang="en")

    protein_item = wdi_core.WDItemEngine(data=statements)
    if protein_item.get_label(lang="en") == "":
        protein_item.set_label(protein_label, lang="en")
    if protein_item.get_description(lang="en") == "":
        protein_item.set_description("protein in " + taxonname, lang="en")
    if protein_item.get_description(lang="de") == "":
        protein_item.set_description("Eiweiß in " + taxonname, lang="de")
    if protein_item.get_description(lang="nl") == "":
        protein_item.set_description("eiwit in " + taxonname, lang="nl")
    if protein_item.get_description(lang="es") == "":
        protein_item.set_description("proteína en " + taxonname, lang="es")
    if protein_item.get_description(lang="it") == "":
        protein_item.set_description("Proteina in " + taxonname, lang="it")

    pprint.pprint(protein_item.get_wd_json_representation())
    protein_qid = protein_item.write(login)
    print(protein_qid)

    ## add the newly create protein item to the gene item
    encodes = [wdi_core.WDItemID(protein_qid, prop_nr="P688", references=[copy.deepcopy(ncbi_reference)])]
    geneitem = wdi_core.WDItemEngine(wd_item_id=geneqid, data=encodes)
    return geneitem.write(login)
Esempio n. 27
0
    def create_xref_statement(self, value, xref_dict):
        for prop_nr, v in xref_dict.items():
            qualifiers = []
            if v:
                for p, vv in v.items():
                    qualifiers.append(
                        wdi_core.WDItemID(value=vv,
                                          prop_nr=p,
                                          is_qualifier=True))

            return wdi_core.WDItemID(value=value,
                                     prop_nr=prop_nr,
                                     qualifiers=qualifiers,
                                     references=[self.create_reference()])
Esempio n. 28
0
    def create_statements(self):
        """
        create statements common to all proteins
        """
        s = []

        ############
        # ID statements
        # Required: uniprot (1)
        # Optional: OMIM (1?), Ensembl protein (0 or more), refseq protein (0 or more)
        ############
        entrez_gene = self.external_ids['Entrez Gene ID']
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)
        entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                     self.external_ids['Entrez Gene ID'], login=self.login)

        s.append(wdi_core.WDString(self.external_ids['UniProt ID'], PROPS['UniProt ID'], references=[uniprot_ref]))

        for key in ['Saccharomyces Genome Database ID']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[entrez_ref]))

        key = 'Ensembl Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['ensembl']['@source'], PROPS[key], id, login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        key = 'RefSeq Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['refseq']['@source'], PROPS['Entrez Gene ID'], entrez_gene,
                                      login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        ############
        # Protein statements
        ############
        # instance of protein
        s.append(wdi_core.WDItemID("Q8054", PROPS['instance of'], references=[uniprot_ref]))

        # found in taxon
        s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[uniprot_ref]))

        # encoded by
        s.append(wdi_core.WDItemID(self.gene_wdid, PROPS['encoded by'], references=[uniprot_ref]))

        return s
Esempio n. 29
0
def create_reference():
    refStatedIn = wdi_core.WDItemID(value="Q70116865",
                                    prop_nr="P248",
                                    is_reference=True)
    timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow,
                                   prop_nr="P813",
                                   is_reference=True)
    refStatedIn2 = wdi_core.WDItemID(value="Q21008030",
                                     prop_nr="P248",
                                     is_reference=True)
    refRetrieved2 = wdi_core.WDTime(timeStringNow,
                                    prop_nr="P813",
                                    is_reference=True)
    return [refStatedIn, refRetrieved, refStatedIn2, refRetrieved2]
Esempio n. 30
0
    def create_gp_statements_chr(self):
        """
        Create genomic_pos start stop orientation on a chromosome
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)

        # create qualifier for start/stop/orientation
        chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']]
        qualifiers = [
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              is_qualifier=True)
        ]

        s = []
        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            wdi_core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        # chromosome
        s.append(
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s