def reference_store(source='', identifier=''):
    """
    :param source: database source to be referenced (key name from source_qids)
    :param ref_type: type of WD reference statement (imported from, stated in) (key names from prop_ids)
    :return: PBB_Core reference object for database source
    """
    source_items = {'uniprot': 'Q905695',
                    'ncbi_gene': 'Q20641742',
                    'ncbi_taxonomy': 'Q13711410',
                    'swiss_prot': 'Q2629752',
                    'trembl': 'Q22935315'}

    prop_ids = {'uniprot': 'P352',
                'ncbi_gene': 'P351',
                'ncbi_taxonomy': 'P685',
                'ncbi_locus_tag': 'P2393'
                }
    refs = [PBB_Core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True),
            PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),
            PBB_Core.WDString(value=identifier, prop_nr=prop_ids[source], is_reference=True),
            PBB_Core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())), prop_nr='P813', is_reference=True)
            ]
    for ref in refs:
        ref.overwrite_references = True
    return refs
Example #2
0
    def make_reference(self,
                       stated_in,
                       source_element,
                       source_element_name,
                       source_element_prop,
                       date=time.strftime('+%Y-%m-%dT00:00:00Z'),
                       date_property='P813'):
        ref = [[
            PBB_Core.WDItemID(value=stated_in,
                              prop_nr='P248',
                              is_reference=True),  # stated in
            PBB_Core.WDString(value=source_element,
                              prop_nr=source_element_prop,
                              is_reference=True),  # source element
            PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                              is_reference=True),  # language of work
            PBB_Core.WDMonolingualText(value=source_element_name,
                                       language='en',
                                       prop_nr='P1476',
                                       is_reference=True),
            PBB_Core.WDTime(time=date,
                            prop_nr=date_property,
                            is_reference=True)  # publication date
        ]]

        # this will overwrite all existing references of a WD claim value.
        for x in ref[0]:
            x.overwrite_references = True

        return ref
Example #3
0
    def protein_item_statements():
        """
        construct list of referenced statements to pass to PBB_Core Item engine
        :return:
        """
        uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot)

        WD_String_CLAIMS = {
            'P637':
            str(gene_record['refseq']['protein']),  # set refseq protein id
            'P352': uniprot  # Set uniprot ID
        }

        WD_Item_CLAIMS = {
            'P703': [spec_strain.iloc[0]['wd_qid']
                     ],  # get strain taxid qid from strain record
            'P279': ['Q8054'],  # subclass of protein
        }

        statements = []
        #generate go term claims
        for gt in gene_record['GOTERMS']:
            goprop = go_props[gt[1]]
            govalue = wdo.WDSparqlQueries(
                prop='P686',
                string=gt[0]).wd_prop2qid()  #  Get GeneOntology Item by GO ID
            evprop = 'P459'
            try:
                evvalue = go_evidence_codes[gt[2]]
                evstat = PBB_Core.WDItemID(value=evvalue,
                                           prop_nr=evprop,
                                           is_qualifier=True)
                statements.append(
                    PBB_Core.WDItemID(value=govalue,
                                      prop_nr=goprop,
                                      references=[uniprot_ref],
                                      qualifiers=[evstat]))
            except Exception as e:
                statements.append(
                    PBB_Core.WDItemID(value=govalue,
                                      prop_nr=goprop,
                                      references=[uniprot_ref]))

        # generate list of pbb core value objects for all valid claims
        for k, v in WD_Item_CLAIMS.items():
            if v:
                for i in v:
                    statements.append(
                        PBB_Core.WDItemID(value=i,
                                          prop_nr=k,
                                          references=[uniprot_ref]))

        for k, v in WD_String_CLAIMS.items():
            if v:
                statements.append(
                    PBB_Core.WDString(value=v,
                                      prop_nr=k,
                                      references=[uniprot_ref]))

        return statements
    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        :return:
        """
        # creates reference object for WD gene item claim
        ncbi_gene_reference = wdo.reference_store(
            source='ncbi_gene', identifier=gene_record['_id'])

        # claims for datatype string.
        WD_String_CLAIMS = {
            'P351': str(gene_record['_id']),
            'P2393': gene_record['locus_tag'],
        }
        WD_Genome_Annotation_Claims = {
            'P644': str(int(gene_record['genomic_pos']['start'])),
            'P645': str(int(gene_record['genomic_pos']['end'])),
        }
        # claims for datytpe item
        WD_Item_CLAIMS = {
            'P703': spec_strain.iloc[0]['wd_qid'],
            'P279': 'Q7187',
        }

        # convert integer representation of strand to corresponding WD item (Forward Strand/Reverse Strand)
        if gene_record['genomic_pos']['strand'] == 1:
            WD_Item_CLAIMS['P2548'] = 'Q22809680'
        elif gene_record['genomic_pos']['strand'] == -1:
            WD_Item_CLAIMS['P2548'] = 'Q22809711'
        chromosome = gene_record['genomic_pos']['chr']
        rs_chrom = PBB_Core.WDString(value=chromosome,
                                     prop_nr='P2249',
                                     is_qualifier=True)

        statements = []
        # process to pbb_Core data value object and append to statments for each valid item in each datatype dict
        # WDItemID datatype
        for k, v in WD_Item_CLAIMS.items():
            statements.append(
                PBB_Core.WDItemID(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference]))
        # WDString datatype
        for k, v in WD_String_CLAIMS.items():
            statements.append(
                PBB_Core.WDString(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference]))
        for k, v in WD_Genome_Annotation_Claims.items():
            statements.append(
                PBB_Core.WDString(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference],
                                  qualifiers=[rs_chrom]))

        return statements
Example #5
0
 def create_reference(self):
     first_ref = PBB_Core.WDItemID(value='Q905695', prop_nr='P248', is_reference=True)
     first_ref.overwrite_references = True
     return [
         first_ref,
         PBB_Core.WDString(value=self.uniprot, prop_nr='P352', is_reference=True),
         PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z', time.gmtime()), prop_nr='P813',
                         is_reference=True),
         PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),  # language of work
     ]
Example #6
0
    def create_xref_statement(self, value, xref_dict):
        for prop_nr, v in xref_dict.items():
            qualifiers = []
            if v:
                for p, vv in v.items():
                    qualifiers.append(
                        PBB_Core.WDItemID(value=vv,
                                          prop_nr=p,
                                          is_qualifier=True))

            return PBB_Core.WDItemID(value=value,
                                     prop_nr=prop_nr,
                                     qualifiers=qualifiers,
                                     references=[self.create_reference()])
Example #7
0
 def create_reference(self):
     return [
         PBB_Core.WDItemID(value=self.ontology_ref_item,
                           prop_nr='P248',
                           is_reference=True),
         PBB_Core.WDItemID(value='Q22230760',
                           prop_nr='P143',
                           is_reference=True),
         PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z',
                                            time.gmtime()),
                         prop_nr='P813',
                         is_reference=True),
         PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                           is_reference=True),  # language of work
     ]
Example #8
0
    def __init__(self, object):
            self.logincreds = object["logincreds"]
            self.name = object["uberonLabel"]
            self.uberon = object["uberon"]
            self.uberon_id = self.uberon.replace("http://purl.obolibrary.org/obo/UBERON_", "")
            self.wikidata_id = object["wikidata_id"]
            self.start = object["start"]
            self.graph = object["graph"]

            subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
            id = URIRef("http://www.geneontology.org/formats/oboInOwl#id")
            hasExactSyn = URIRef("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym")
            print(self.uberon_id)
            print(self.name)

            refStatedIn = PBB_Core.WDItemID(21552738, prop_nr='P248', is_reference=True)
            refStatedIn.overwrite_references = True
            refImported = PBB_Core.WDItemID(value=7876491, prop_nr='P143', is_reference=True)
            refImported.overwrite_references = True
            timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
            refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True)
            refRetrieved.overwrite_references = True
            ub_reference = [refStatedIn, refImported, refRetrieved]


            if self.uberon_id in self.wikidata_id.keys():
                self.wdid = self.wikidata_id[self.uberon_id.replace("UBERON:", "")]
            else:
                self.wdid = None

            self.synonyms = []
            for synonym in self.graph.objects(URIRef(self.uberon), hasExactSyn):
                self.synonyms.append(str(synonym))

            prep = dict()
            prep["P279"] = [PBB_Core.WDItemID(value='Q4936952', prop_nr='P279', references=[copy.deepcopy(ub_reference)])]
            prep["P1554"] = [PBB_Core.WDString(value=self.uberon_id, prop_nr='P1554', references=[copy.deepcopy(ub_reference)])]
            print(self.uberon)
            prep["P1709"] = [PBB_Core.WDUrl(value=self.uberon, prop_nr='P1709', references=[copy.deepcopy(ub_reference)])]

            data2add = []
            for key in prep.keys():
                for statement in prep[key]:
                    data2add.append(statement)
                    print(statement.prop_nr, statement.value)

            if self.wdid is not None:
                wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure",append_value=['P279'])
            else:
                wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure", append_value=['P279'])
            if len(self.synonyms) >0:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            print(self.synonyms)
            for syn in self.synonyms:
                print(syn)
            wdPage.write(self.logincreds)
            print("======")
            sys.exit()
Example #9
0
def interwiki_link(entrez, name):
    # Query wikidata for Q-item id (cid)

    cid_query = """
        SELECT ?cid  WHERE {
        ?cid wdt:P351 ?entrez_id  .
        FILTER(?entrez_id ='""" + str(entrez) + """') .
    }
    """

    wikidata_results = PBB_Core.WDItemEngine.execute_sparql_query(
        prefix=settings.PREFIX, query=cid_query)['results']['bindings']
    cid = ''
    for x in wikidata_results:
        cid = x['cid']['value'].split('/')[-1]

    # create interwiki link
    username = models.CharField(max_length=200, blank=False)
    password = models.CharField(max_length=200, blank=False)
    # create your login object with your user and password (or the ProteinBoxBot account?)
    login_obj = PBB_login.WDLogin(user=username, pwd=password)
    # load the gene Wikidata object
    wd_gene_item = PBB_Core.WDItemEngine(wd_item_id=cid)
    # set the interwiki link to the correct Wikipedia page
    wd_gene_item.set_sitelink(site='enwiki', title=name)
    # write the changes to the item
    wd_gene_item.write(login_obj)
def main():
    """
    This function undo gene to protein merges. For that, a query searches for WD items which have the
    Entrez gene ID (P351) and Uniprot ID (P352) on one item. Bases on that, it generates instances of MergeDefender
    and undoes the merges. 
    :return: None
    """
    print(sys.argv[1])
    # pwd = input('Password:'******'ProteinBoxBot', pwd=sys.argv[1])

    conflict_set_1 = {'P351'}
    conflict_set_2 = {'P352'}

    likely_merged_ids = PBB_Core.WDItemList(wdquery='CLAIM[351] AND CLAIM[352]')
    print(likely_merged_ids.wditems['items'])

    for count, x in enumerate(likely_merged_ids.wditems['items']):
        print('\n', count)
        print('Q{}'.format(x))

        try:

            MergeDefender(login, merge_target='Q{}'.format(x), conflict_set_1=conflict_set_1, conflict_set_2=conflict_set_2)

        except Exception as e:
            traceback.print_exc()
            PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}"'.format(
                        main_data_id=x,
                        exception_type=type(e),
                        message=e.__str__(),
                    ))
    def __init__(self):
        self.start = time.time()
        self.content = ET.fromstring(self.download_disease_ontology())
        self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(),
                                            PBB_settings.getWikiDataPassword())
        # self.updateDiseaseOntologyVersion()

        # Get all WikiData entries that contain a WikiData ID
        print("Getting all terms with a Disease Ontology ID in WikiData")
        doWikiData_id = dict()
        DoInWikiData = PBB_Core.WDItemList("CLAIM[699]", "699")

        print("Getting latest version of Disease Ontology from Github")
        r = requests.get(
            "https://api.github.com/repos/DiseaseOntology/HumanDiseaseOntology/git/refs"
        )
        test = r.json()
        sha = test[0]["object"]["sha"]
        githubReferenceUrl = "https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/" + sha + "/src/ontology/doid.owl"

        for diseaseItem in DoInWikiData.wditems["props"]["699"]:
            doWikiData_id[str(diseaseItem[2])] = diseaseItem[
                0]  # diseaseItem[2] = DO identifier, diseaseItem[0] = WD identifier

        for doClass in self.content.findall(
                './/owl:Class', DiseaseOntology_settings.getDoNameSpaces()):
            try:
                disVars = []
                disVars.append(doClass)
                disVars.append(githubReferenceUrl)
                disVars.append(doWikiData_id)
                disVars.append(self.logincreds)
                disVars.append(self.start)

                diseaseClass = disease(disVars)

                print("do_id: " + diseaseClass.do_id)
                print(diseaseClass.wdid)
                print(diseaseClass.name)
                print(diseaseClass.synonyms)
                print(diseaseClass.xrefs)
            except Exception as e:
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=diseaseClass.do_id,
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='-',
                            duration=time.time() - self.start))
                f = open('/tmp/Diseaseexceptions.txt', 'a')
                # f.write("Unexpected error:", sys.exc_info()[0]+'\n')
                f.write(diseaseClass.do_id + "\n")
                #f.write(diseaseClass.wd_json_representation)
                traceback.print_exc(file=f)
                f.close()
Example #12
0
def print_item(qid):
    wd_item = PBB_Core.WDItemEngine(wd_item_id=qid, use_sparql=True)
    label = wd_item.get_label()
    description = wd_item.get_description()
    aliases = wd_item.get_aliases()
    sitelinks_string = extract_sitelinks(
        wd_item.get_wd_json_representation()['sitelinks'])

    statement_print = ''

    for stmt in wd_item.statements:
        # retrieve English prop label and store in prop_label dict to minimize traffic
        prop_nr = stmt.get_prop_nr()
        prop_label = ''
        if prop_nr not in prop_store:
            prop_item = PBB_Core.WDItemEngine(wd_item_id=prop_nr)
            prop_label = prop_item.get_label()
            prop_store[prop_nr] = prop_label
        else:
            prop_label = prop_store[prop_nr]

        item_label = stmt.get_value()
        item_id = ''
        if isinstance(stmt, PBB_Core.WDItemID):
            item_id = item_label
            # print(item_id)
            item = PBB_Core.WDItemEngine(wd_item_id='Q{}'.format(item_label))
            item_label = '{} (QID: Q{})'.format(item.get_label(), item_id)

        statement_print += 'Prop: {0:.<40} value: {1} \n    '.format(
            '{} ({})'.format(prop_label, prop_nr), item_label)

    output = '''


    Item QID: {4}
    Item: {0} / {1} / {2}
    {3}
    {5}
    '''.format(label, description, aliases, statement_print, qid,
               sitelinks_string)

    print(output)
Example #13
0
def main():
    pwd = input('Password:'******'ProteinBoxBot', pwd=pwd)

    # for mouse genes
    # LabelReplacement(PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:83310]').wditems['items'], {'gène': 'gène de souris'},
    #                  'fr', login)

    # for human genes
    LabelReplacement(PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:5]').wditems['items'], {'gène': 'gène humain'},
                     'fr', login)
Example #14
0
    def protein_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        :return:
        """
        uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot)

        WD_String_CLAIMS = {
            'P637': str(gene_record['refseq']['protein']),
            #'P2393': gene_record['locus_tag'],
            'P352': uniprot
            #'P591': str(gene_record['EC number'])
        }
        WD_Item_CLAIMS = {
            'P703': [spec_strain.iloc[0]['wd_qid']],
            'P279': ['Q8054'],
            'P680': [],  # molecular function
            'P681': [],  # cellular component
            'P682': []  # biological process
        }
        for gt in gene_record['GOTERMS']:
            gtids = parse_go_terms(gt)
            WD_Item_CLAIMS[gtids[1]].append(gtids[0])

        statements = []
        # generate list of pbb core value objects for all valid claims
        for k, v in WD_Item_CLAIMS.items():
            if v:
                for i in v:
                    statements.append(
                        PBB_Core.WDItemID(value=i,
                                          prop_nr=k,
                                          references=[uniprot_ref]))

        for k, v in WD_String_CLAIMS.items():
            if v:
                statements.append(
                    PBB_Core.WDString(value=v,
                                      prop_nr=k,
                                      references=[uniprot_ref]))
        return statements
    def __init__(self, object):
        self.logincreds = object["logincreds"]
        self.source = object["source"]
        self.ortholog = object["ortholog"]
        self.species = object["speciesWdID"]

        # Prepare references
        refStatedInHomologeneBuild = PBB_Core.WDItemID(value='Q20976936',
                                                       prop_nr='P248',
                                                       is_reference=True)
        refImportedFromHomologen = PBB_Core.WDItemID(value='Q468215',
                                                     prop_nr='P143',
                                                     is_reference=True)

        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow,
                                       prop_nr='P813',
                                       is_reference=True)

        homologene_reference = [[
            refStatedInHomologeneBuild, refImportedFromHomologen, refRetrieved
        ]]

        # Prepare qualifiers
        humanQualifier = PBB_Core.WDItemID(value='Q5',
                                           prop_nr='P703',
                                           is_qualifier=True)
        mouseQualifier = PBB_Core.WDItemID(value='Q83310',
                                           prop_nr='P703',
                                           is_qualifier=True)

        # Prepare the items to add
        if self.species == "Q5":
            orthologValue = PBB_Core.WDItemID(value=self.ortholog,
                                              prop_nr='P684',
                                              references=homologene_reference,
                                              qualifiers=[humanQualifier])
        elif self.species == "Q83310":
            orthologValue = PBB_Core.WDItemID(value=self.ortholog,
                                              prop_nr='P684',
                                              references=homologene_reference,
                                              qualifiers=[mouseQualifier])

        wdPage = PBB_Core.WDItemEngine(wd_item_id=self.source,
                                       data=[orthologValue],
                                       server="www.wikidata.org",
                                       domain="genes")
        print(wdPage.wd_json_representation)
        wdPage.write(self.logincreds)
Example #16
0
def generate_refs(iuphar_ligand):
    ref_list = [[]]

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q2793172', prop_nr='P248',
                          is_reference=True),  # stated in
        PBB_Core.WDString(value=iuphar_ligand,
                          prop_nr='P595',
                          is_reference=True),  # source element
    ])

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                          is_reference=True),  # language of work
        # PBB_Core.WDMonolingualText(value=source_element_name, language='en',
        #                            prop_nr='P1476', is_reference=True),
        PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)  # publication date
    ])

    return ref_list
Example #17
0
    def __init__(self):
        self.start = time.time()
        self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(),
                                            PBB_settings.getWikiDataPassword())
        # Get all WikiData entries that contain a WikiData ID
        print("Getting all terms with a Gene Ontology ID in WikiData")
        goWikiData_id = dict()
        goInWikiData = PBB_Core.WDItemList("CLAIM[686]", "686")
        for goItem in goInWikiData.wditems["props"]["686"]:
            goWikiData_id[str(goItem[2])] = goItem[
                0]  # diseaseItem[2] = go identifier, diseaseItem[0] = go identifier
        print(len(goWikiData_id.keys()))
        sys.exit()
        graph = rdflib.Graph()

        goUrl = requests.get("http://purl.obolibrary.org/obo/go.owl")

        print("ja")
        graph.parse(data=goUrl.text, format="application/rdf+xml")

        cls = URIRef("http://www.w3.org/2002/07/owl#Class")
        subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
        counter = 0
        for gouri in graph.subjects(RDF.type, cls):
            try:
                counter = counter + 1
                print(counter)
                goVars = dict()
                goVars["uri"] = gouri
                goVars["label"] = graph.label(URIRef(gouri))
                goVars["wikidata_id"] = goWikiData_id
                goVars["logincreds"] = self.logincreds
                goVars["start"] = self.start
                goVars["graph"] = graph
                if "GO" in gouri:
                    goClass = goTerm(goVars)

            except Exception as e:
                print(traceback.format_exc())
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=gouri,
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='-',
                            duration=time.time() - self.start))
Example #18
0
    def __init__(self):
        self.content = json.loads(self.download_mouse_proteins())
        # print self.content["results"]["bindings"]
        self.protein_count = len(self.content["results"]["bindings"])
        self.proteins = self.content["results"]["bindings"]
        self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(),
                                            PBB_settings.getWikiDataPassword())
        uniprotWikidataIds = dict()
        print "Getting all proteins with a uniprot ID in Wikidata"
        InWikiData = PBB_Core.WDItemList("CLAIM[703:83310] AND CLAIM[352]",
                                         "352")

        r0 = requests.get(
            "http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e+%0d%0aPREFIX+taxonomy%3a+%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aSELECT+DISTINCT+*%0d%0aWHERE%0d%0a%7b%0d%0a%09%09%3fprotein+a+up%3aProtein+.%0d%0a++%09%09%3fprotein+rdfs%3alabel+%3fprotein_label+.%0d%0a++++++++%3fprotein+up%3aorganism+taxonomy%3a10090+.%0d%0a%7d&format=srj"
        )

        for proteinItem in InWikiData.wditems["props"]["352"]:
            try:
                uniprotWikidataIds[str(proteinItem[2])] = proteinItem[0]
                r = requests.get(
                    "http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e%0d%0aPREFIX+taxonomy%3a%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aPREFIX+database%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fdatabase%2f%3e%0d%0aSELECT+%3funiprot+%3fplabel+%3fecName+%3fupversion%0d%0a+++++++(group_concat(distinct+%3falias%3b+separator%3d%22%3b+%22)+as+%3fupalias)%0d%0a+++++++(group_concat(distinct+%3fpdb%3b+separator%3d%22%3b+%22)+as+%3fpdbid)%0d%0a+++++++(group_concat(distinct+%3frefseq%3b+separator%3d%22%3b+%22)+as+%3frefseqid)%0d%0a+++++++(group_concat(distinct+%3fensP%3b+separator%3d%22%3b+%22)+as+%3fensemblp)%0d%0aWHERE%0d%0a%7b%0d%0a%09%09VALUES+%3funiprot+%7b%3chttp%3a%2f%2fpurl.uniprot.org%2funiprot%2f"
                    + str(proteinItem[2]) +
                    "%3e%7d%0d%0a++++++++%3funiprot+rdfs%3alabel+%3fplabel+.%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.+%0d%0a++++++++optional%7b%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++%3fupAlias+up%3aecName+%3fecName+.%7d%0d%0a++++++++%0d%0a++++++++OPTIONAL%7b+%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++++%7b%3fupAlias+up%3afullName+%3falias+.%7d+UNION%0d%0a++++++++%7b%3fupAlias+up%3ashortName+%3falias+.%7d%7d%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fpdb+.%0d%0a++++++++%3fpdb+up%3adatabase+database%3aPDB+.%7d%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3frefseq+.%0d%0a++++++++%3frefseq+up%3adatabase+database%3aRefSeq+.%7d++%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fensT+.%0d%0a++++++++%3fensT+up%3adatabase+database%3aEnsembl+.%0d%0a++++++++%3fensT+up%3atranslatedTo+%3fensP+.%7d%0d%0a%7d%0d%0agroup+by+%3fupAlias+%3funiprot+%3fplabel+%3fecName+%3fupversion&format=srj"
                )
                # r = requests.get("http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e%0d%0aPREFIX+taxonomy%3a%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aPREFIX+database%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fdatabase%2f%3e%0d%0aSELECT+%3funiprot+%3fplabel+%3fecName+%0d%0a+++++++(group_concat(distinct+%3falias%3b+separator%3d%22%3b+%22)+as+%3fupalias)%0d%0a+++++++(group_concat(distinct+%3fpdb%3b+separator%3d%22%3b+%22)+as+%3fpdbid)%0d%0a+++++++(group_concat(distinct+%3frefseq%3b+separator%3d%22%3b+%22)+as+%3frefseqid)%0d%0a+++++++(group_concat(distinct+%3fensP%3b+separator%3d%22%3b+%22)+as+%3fensemblp)%0d%0aWHERE%0d%0a%7b%0d%0a%09%09VALUES+%3funiprot+%7b%3chttp%3a%2f%2fpurl.uniprot.org%2funiprot%2f"+str(proteinItem[2])+"%3e%7d%0d%0a++++++++%3funiprot+rdfs%3alabel+%3fplabel+.%0d%0a++++++++optional%7b%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++%3fupAlias+up%3aecName+%3fecName+.%7d%0d%0a++++++++%0d%0a++++++++OPTIONAL%7b+%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++++%7b%3fupAlias+up%3afullName+%3falias+.%7d+UNION%0d%0a++++++++%7b%3fupAlias+up%3ashortName+%3falias+.%7d%7d%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fpdb+.%0d%0a++++++++%3fpdb+up%3adatabase+database%3aPDB+.%7d%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3frefseq+.%0d%0a++++++++%3frefseq+up%3adatabase+database%3aRefSeq+.%7d++%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fensT+.%0d%0a++++++++%3fensT+up%3adatabase+database%3aEnsembl+.%0d%0a++++++++%3fensT+up%3atranslatedTo+%3fensP+.%7d%0d%0a%7d%0d%0agroup+by+%3fupAlias+%3funiprot+%3fplabel+%3fecName&format=srj")
                print r.text
                protein = json.loads(r.text)
                protein["logincreds"] = self.logincreds
                protein["wdid"] = 'Q' + str(proteinItem[0])
                print protein
                proteinClass = mouse_protein(protein)

            except:
                # client = Client('http://*****:*****@sentry.sulab.org/9')
                # client.captureException()
                print "There has been an except"
                print "Unexpected error:", sys.exc_info()[0]

                f = open('/tmp/exceptions.txt', 'a')
                # f.write("Unexpected error:", sys.exc_info()[0]+'\n')
                f.write(
                    str(protein["results"]["bindings"][0]["uniprot"]["value"])
                    + "\n")
                traceback.print_exc(file=f)
                f.close()
Example #19
0
def get_wd_search_results(search_string=''):
        """
        Performs a search in WD for a certain WD search string
        :param search_string: a string which should be searched for in WD
        :return: returns a list of QIDs found in the search and a list of labels complementary to the QIDs
        """
        try:
            url = 'https://www.wikidata.org/w/api.php'
            params = {
                'action': 'wbsearchentities',
                'language': 'en',
                'search': search_string,
                'format': 'json',
                'limit': '15'
            }

            reply = requests.get(url, params=params)
            search_results = reply.json()

            if search_results['success'] != 1:
                raise PBB_Core.WDSearchError('WD search failed')
            elif len(search_results['search']) == 0:
                return []
            else:
                id_list = []
                id_labels = []
                id_descr = []
                id_aliases = []
                for i in search_results['search']:
                    id_list.append(i['id'])
                    id_labels.append(i['label'])
                    if 'description' in i:
                        id_descr.append(i['description'])
                    else:
                        id_descr.append('')
                    if 'aliases' in i:
                        id_aliases.append(i['aliases'])
                    else:
                        id_aliases.append('')

                return id_list, id_labels, id_descr, id_aliases

        except requests.HTTPError as e:
            print(e)
Example #20
0
def merge(merge_to, merge_from, login_obj):
    data = [PBB_Core.WDBaseDataType.delete_statement(prop_nr='P279')]
    try:
        wd_item = PBB_Core.WDItemEngine(wd_item_id=merge_from, data=data)
        wd_item.set_description(description='', lang='en')
        wd_item.set_description(description='', lang='de')
        wd_item.set_description(description='', lang='fr')
        wd_item.set_description(description='', lang='nl')
        wd_item.write(login=login_obj)

        print('merge accepted')
        merge_reply = PBB_Core.WDItemEngine.merge_items(from_id=merge_from, to_id=merge_to, login_obj=login_obj)
        pprint.pprint(merge_reply)
        print('merge completed')
    except PBB_Core.MergeError as e:
        pprint.pprint(e)

    except Exception as e:
        pprint.pprint(e)
Example #21
0
    def __init__(self, wd_item_list, replacement_map, lang, login):
        for count, i in enumerate(wd_item_list):
            qid = 'Q{}'.format(i)
            wd_item = PBB_Core.WDItemEngine(wd_item_id=qid)

            description = wd_item.get_description(lang)

            if description in replacement_map:
                print('entered')
                en_label = ''
                if 'en' in wd_item.get_wd_json_representation()['labels']:
                    en_label = wd_item.get_wd_json_representation()['labels']['en']['value']
                print('\n')
                print('Label: {}'.format(en_label), 'QID: ', wd_item.wd_item_id)
                print(count)

                try:
                    edit_token = login.get_edit_token()
                    cookies = login.get_edit_cookie()

                    params = {
                        'action': 'wbsetdescription',
                        'id': qid,
                        'language': lang,
                        'value': replacement_map[description],
                        'token': edit_token,
                        'bot': '',
                        'format': 'json',
                    }

                    reply = requests.post('https://www.wikidata.org/w/api.php', data=params, cookies=cookies)
                    # print(reply.text)

                except requests.HTTPError as e:
                    print(e)

                except Exception as e:
                    print(e)

            else:
                print('No action required for QID: ', wd_item.wd_item_id, ' |count: ', count)
Example #22
0
    def __init__(self):
        self.start = time.time()
        self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(), PBB_settings.getWikiDataPassword())
        # Get all WikiData entries that contain a WikiData ID
        print("Getting all terms with a Uberon ID in WikiData")
        ubWikiData_id = dict()
        ubInWikiData = PBB_Core.WDItemList("CLAIM[1554]", "1554")
        for uberonItem in ubInWikiData.wditems["props"]["1554"]:
           ubWikiData_id[str(uberonItem[2])]=uberonItem[0] # diseaseItem[2] = Uberon identifier, diseaseItem[0] = Uberon identifier
        graph = rdflib.Graph()

        ubUrl = requests.get("http://purl.obolibrary.org/obo/uberon.owl")

        print("ja")
        graph.parse(data=ubUrl.text, format="application/rdf+xml")

        cls = URIRef("http://www.w3.org/2002/07/owl#Class")
        subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
        for uberonuri in graph.subjects(RDF.type, cls):
            try:
                uberonVars = dict()
                uberonVars["uberon"] = uberonuri
                uberonVars["uberonLabel"] = graph.label(URIRef(uberonuri))
                uberonVars["wikidata_id"] = ubWikiData_id
                uberonVars["logincreds"] = self.logincreds
                uberonVars["start"] = self.start
                uberonVars["graph"] = graph
                if "UBERON" in uberonuri:
                    uberonClass = uberonTerm(uberonVars)

            except Exception as e:
                print(traceback.format_exc())
                PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                        main_data_id=uberonuri,
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id='-',
                        duration=time.time() - self.start
                    ))
Example #23
0
def generate_refs(ref_source_id):
    ref_list = [[]]

    if ref_source_id.startswith('C'):
        ref_list[0].extend([
            PBB_Core.WDItemID(value='Q6120337', prop_nr='P248', is_reference=True),  # stated in
            PBB_Core.WDString(value=ref_source_id, prop_nr='P592', is_reference=True),  # source element
        ])
    elif ref_source_id.startswith('N'):
        ref_list[0].extend([
            PBB_Core.WDItemID(value='Q21008030', prop_nr='P248', is_reference=True),  # stated in
            PBB_Core.WDString(value=ref_source_id, prop_nr='P2115', is_reference=True),  # source element
        ])

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),  # language of work
        # PBB_Core.WDMonolingualText(value=source_element_name, language='en',
        #                            prop_nr='P1476', is_reference=True),
        PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True)  # publication date
    ])

    return ref_list
Example #24
0
    def __init__(self, login):

        self.login_obj = login

        image_data = pd.read_csv(
            './image_data/gene_wiki_images_with_preferred.txt',
            encoding='utf-8',
            sep='\t',
            dtype={'entrez': np.str})

        wdq_results = PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:5]',
                                          '351').wditems
        wd_entrez_ids = list(map(lambda z: z[2], wdq_results['props']['351']))
        entrez_qid_list = list(
            map(lambda z: 'Q{}'.format(z[0]), wdq_results['props']['351']))

        print(len(wd_entrez_ids))

        for index in image_data.index:
            start = time.time()
            # print(image_data.loc[index, 'other_images'])
            image_names = image_data.loc[index, 'other_images']

            preferred_image = image_data.loc[index, 'primary_image']

            image_file_extension = ['.png', '.jpg', '.jpeg', '.pdf']
            if pd.notnull(preferred_image) and '|' in preferred_image:
                for splt in preferred_image.split('|'):
                    for ending in image_file_extension:
                        if ending in splt:
                            preferred_image = splt
                            break

            entrez = image_data.loc[index, 'entrez']
            # print(entrez)

            protein_images = []
            protein_image_value_store = []
            genex_images = []
            genex_value_store = []

            if entrez not in wd_entrez_ids:
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=entrez,
                            exception_type='',
                            message='Entrez ID not yet in Wikidata!!',
                            wd_id='',
                            duration=time.time() - start))
                continue
            else:
                curr_qid = entrez_qid_list[wd_entrez_ids.index(entrez)]

            if pd.isnull(image_names):
                PBB_Core.WDItemEngine.log(
                    'WARNING',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=entrez,
                            exception_type='',
                            message='No images available for this Entrez ID',
                            wd_id=curr_qid,
                            duration=time.time() - start))
                continue

            for sub_string in image_names.split('|'):
                if 'PBB GE ' in sub_string:
                    value = sub_string[5:]

                    # if value[-6:-4] == 'tn':
                    #     value = value[:-6] + 'fs' + value[-4:]

                    # Gene Expression reference: https://www.wikidata.org/wiki/Q21074956

                    genex_images.append(value)
                    genex_value_store.append(
                        PBB_Core.WDCommonsMedia(value=value, prop_nr='P692'))
                elif 'PDB ' in sub_string:
                    value = sub_string[5:]
                    protein_images.append(value)

                    protein_image_value_store.append(
                        PBB_Core.WDCommonsMedia(value, prop_nr=''))

            entrez_id_value = PBB_Core.WDString(value=entrez, prop_nr='P351')

            data = [entrez_id_value]
            data.extend(genex_value_store)

            if pd.notnull(preferred_image):
                data.append(
                    PBB_Core.WDCommonsMedia(value=preferred_image,
                                            prop_nr='P18'))

            try:
                gene_item = PBB_Core.WDItemEngine(wd_item_id=curr_qid,
                                                  domain='genes',
                                                  data=data)
                # pprint.pprint(gene_item.get_wd_json_representation())

                gene_item.write(self.login_obj)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=entrez,
                            exception_type='',
                            message='success',
                            wd_id=curr_qid,
                            duration=time.time() - start))
                print(index, 'success', curr_qid, entrez,
                      gene_item.get_label(lang='en'))

            except Exception as e:
                print(index, 'error', curr_qid, entrez)
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id=entrez,
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id=curr_qid,
                            duration=time.time() - start))
Example #25
0
    def cleanup_obsolete_edges(ontology_id,
                               core_property_nr,
                               login,
                               current_node_qids=(),
                               obsolete_term=False):
        filter_props_string = ''
        if not obsolete_term:
            for x in OBOImporter.obo_wd_map.values():
                prop_nr = list(x.keys())[0]
                filter_props_string += 'Filter (?p = wdt:{})\n'.format(prop_nr)

        query = '''
        SELECT DISTINCT ?qid ?p ?onto_qid WHERE {{
            {{
                SELECT DISTINCT ?onto_qid WHERE {{
                    ?onto_qid wdt:{2} '{0}' .
                }}
            }}
            ?qid ?p [wdt:{2} '{0}'].
            {1}
        }}
        ORDER BY ?qid
        '''.format(ontology_id, filter_props_string, core_property_nr)
        print(query)

        sr = PBB_Core.WDItemEngine.execute_sparql_query(query=query)

        for occurrence in sr['results']['bindings']:
            if 'statement' in occurrence['qid']['value']:
                continue

            start = time.time()

            qid = occurrence['qid']['value'].split('/')[-1]
            if qid in current_node_qids:
                continue

            prop_nr = occurrence['p']['value'].split('/')[-1]
            wd_onto_qid = occurrence['onto_qid']['value'].split('/')[-1]
            wd_item_id = PBB_Core.WDItemID(value=wd_onto_qid, prop_nr=prop_nr)
            setattr(wd_item_id, 'remove', '')
            try:
                wd_item = PBB_Core.WDItemEngine(wd_item_id=qid,
                                                data=[wd_item_id])
                wd_item.write(login=login)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}'.format(ontology_id),
                            exception_type='',
                            message='successfully removed obsolete edges',
                            wd_id=qid,
                            duration=time.time() - start))
            except Exception as e:
                print(e)
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}'.format(ontology_id),
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id=qid,
                            duration=time.time() - start))

        if obsolete_term:
            data = [
                PBB_Core.WDString(value=ontology_id,
                                  prop_nr=core_property_nr,
                                  rank='deprecated'),
            ]

            start = time.time()
            try:
                wd_item = PBB_Core.WDItemEngine(item_name='obo',
                                                domain='obo',
                                                data=data,
                                                use_sparql=True)
                if wd_item.create_new_item:
                    return
                qid = wd_item.write(login=login)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}'.format(ontology_id),
                            exception_type='',
                            message='successfully obsoleted the ',
                            wd_id=qid,
                            duration=time.time() - start))
            except Exception as e:
                print(e)
                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}'.format(ontology_id),
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='',
                            duration=time.time() - start))
Example #26
0
        def get_item_qid(go_id, data=()):
            start = time.time()

            if self.use_prefix:
                id_string = '{}:{}'.format(self.ontology, go_id)
            else:
                id_string = go_id

            # for efficiency reasons, skip if item already had a root write performed
            if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \
                    and 'qid' in self.local_qid_onto_map[go_id]:
                return self.local_qid_onto_map[go_id]['qid'], False, False

            try:
                data = list(data)

                r = OBOImporter.ols_session.get(
                    url=self.base_url + '{}_{}'.format(self.ontology, go_id),
                    headers=self.headers)
                go_term_data = r.json()
                label = go_term_data['label'].replace('_', ' ')

                description = go_term_data['description'][0]

                if go_term_data['is_obsolete']:
                    OBOImporter.cleanup_obsolete_edges(
                        ontology_id=id_string,
                        login=self.login_obj,
                        core_property_nr=self.core_property_nr,
                        obsolete_term=True)
                    return None, None, None

                # get parent ontology term info so item can be populated with description, etc.
                data.append(
                    PBB_Core.WDString(value=id_string,
                                      prop_nr=self.core_property_nr,
                                      references=[self.create_reference()]))

                exact_match_string = 'http://purl.obolibrary.org/obo/{}_{}'.format(
                    self.ontology, go_id)
                data.append(
                    PBB_Core.WDUrl(value=exact_match_string, prop_nr='P2888'))

                # add xrefs
                if go_term_data['obo_xref']:
                    for xref in go_term_data['obo_xref']:
                        if xref['database'] in OBOImporter.xref_props:
                            wd_prop = OBOImporter.xref_props[xref['database']]
                        else:
                            continue
                        xref_value = xref['id']
                        data.append(
                            PBB_Core.WDExternalID(
                                value=xref_value,
                                prop_nr=wd_prop,
                                references=[self.create_reference()]))

                if go_term_data['obo_synonym']:
                    for syn in go_term_data['obo_synonym']:
                        if syn['type'] in OBOImporter.obo_synonyms:
                            wd_prop = OBOImporter.obo_synonyms[syn['type']]
                        else:
                            continue
                        syn_value = syn['name']
                        data.append(
                            PBB_Core.WDExternalID(
                                value=syn_value,
                                prop_nr=wd_prop,
                                references=[self.create_reference()]))

                if go_id in self.local_qid_onto_map:
                    wd_item = PBB_Core.WDItemEngine(
                        wd_item_id=self.local_qid_onto_map[go_id]['qid'],
                        domain='obo',
                        data=data,
                        fast_run=self.fast_run,
                        fast_run_base_filter=self.fast_run_base_filter)
                else:
                    wd_item = PBB_Core.WDItemEngine(
                        item_name='test',
                        domain='obo',
                        data=data,
                        fast_run=self.fast_run,
                        fast_run_base_filter=self.fast_run_base_filter)
                wd_item.set_label(label=label)
                wd_item.set_description(description=description[0:250])
                # if len(description) <= 250:
                #     wd_item.set_description(description=description)
                # else:
                #     wd_item.set_description(description='Gene Ontology term')
                if go_term_data['synonyms'] is not None and len(
                        go_term_data['synonyms']) > 0:
                    aliases = []
                    for alias in go_term_data['synonyms']:
                        if len(alias) <= 250:
                            aliases.append(alias)

                    wd_item.set_aliases(aliases=aliases)

                new_msg = ''
                if wd_item.create_new_item:
                    new_msg = ': created new {} term'.format(self.ontology)

                qid = wd_item.write(login=self.login_obj)

                if go_id not in self.local_qid_onto_map:
                    self.local_qid_onto_map[go_id] = {
                        'qid': qid,
                        'had_root_write': False,
                    }

                if go_id == current_root_id:
                    self.local_qid_onto_map[go_id]['had_root_write'] = True
                    self.local_qid_onto_map[go_id]['parents'] = list(parents)
                    self.local_qid_onto_map[go_id]['children'] = list(children)

                current_node_qids.append(qid)
                print('QID created or retrieved', qid)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type='',
                            message='success{}'.format(new_msg),
                            wd_id=qid,
                            duration=time.time() - start))
                return qid, go_term_data['obo_xref'], wd_item.require_write

            except Exception as e:
                print(e)
                # traceback.print_exc(e)

                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='',
                            duration=time.time() - start))
                return None, None, None
Example #27
0
    def write_term(self, current_root_id, parents, children):
        print('current_root', current_root_id, parents, children)
        current_node_qids = []

        def get_item_qid(go_id, data=()):
            start = time.time()

            if self.use_prefix:
                id_string = '{}:{}'.format(self.ontology, go_id)
            else:
                id_string = go_id

            # for efficiency reasons, skip if item already had a root write performed
            if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \
                    and 'qid' in self.local_qid_onto_map[go_id]:
                return self.local_qid_onto_map[go_id]['qid'], False, False

            try:
                data = list(data)

                r = OBOImporter.ols_session.get(
                    url=self.base_url + '{}_{}'.format(self.ontology, go_id),
                    headers=self.headers)
                go_term_data = r.json()
                label = go_term_data['label'].replace('_', ' ')

                description = go_term_data['description'][0]

                if go_term_data['is_obsolete']:
                    OBOImporter.cleanup_obsolete_edges(
                        ontology_id=id_string,
                        login=self.login_obj,
                        core_property_nr=self.core_property_nr,
                        obsolete_term=True)
                    return None, None, None

                # get parent ontology term info so item can be populated with description, etc.
                data.append(
                    PBB_Core.WDString(value=id_string,
                                      prop_nr=self.core_property_nr,
                                      references=[self.create_reference()]))

                exact_match_string = 'http://purl.obolibrary.org/obo/{}_{}'.format(
                    self.ontology, go_id)
                data.append(
                    PBB_Core.WDUrl(value=exact_match_string, prop_nr='P2888'))

                # add xrefs
                if go_term_data['obo_xref']:
                    for xref in go_term_data['obo_xref']:
                        if xref['database'] in OBOImporter.xref_props:
                            wd_prop = OBOImporter.xref_props[xref['database']]
                        else:
                            continue
                        xref_value = xref['id']
                        data.append(
                            PBB_Core.WDExternalID(
                                value=xref_value,
                                prop_nr=wd_prop,
                                references=[self.create_reference()]))

                if go_term_data['obo_synonym']:
                    for syn in go_term_data['obo_synonym']:
                        if syn['type'] in OBOImporter.obo_synonyms:
                            wd_prop = OBOImporter.obo_synonyms[syn['type']]
                        else:
                            continue
                        syn_value = syn['name']
                        data.append(
                            PBB_Core.WDExternalID(
                                value=syn_value,
                                prop_nr=wd_prop,
                                references=[self.create_reference()]))

                if go_id in self.local_qid_onto_map:
                    wd_item = PBB_Core.WDItemEngine(
                        wd_item_id=self.local_qid_onto_map[go_id]['qid'],
                        domain='obo',
                        data=data,
                        fast_run=self.fast_run,
                        fast_run_base_filter=self.fast_run_base_filter)
                else:
                    wd_item = PBB_Core.WDItemEngine(
                        item_name='test',
                        domain='obo',
                        data=data,
                        fast_run=self.fast_run,
                        fast_run_base_filter=self.fast_run_base_filter)
                wd_item.set_label(label=label)
                wd_item.set_description(description=description[0:250])
                # if len(description) <= 250:
                #     wd_item.set_description(description=description)
                # else:
                #     wd_item.set_description(description='Gene Ontology term')
                if go_term_data['synonyms'] is not None and len(
                        go_term_data['synonyms']) > 0:
                    aliases = []
                    for alias in go_term_data['synonyms']:
                        if len(alias) <= 250:
                            aliases.append(alias)

                    wd_item.set_aliases(aliases=aliases)

                new_msg = ''
                if wd_item.create_new_item:
                    new_msg = ': created new {} term'.format(self.ontology)

                qid = wd_item.write(login=self.login_obj)

                if go_id not in self.local_qid_onto_map:
                    self.local_qid_onto_map[go_id] = {
                        'qid': qid,
                        'had_root_write': False,
                    }

                if go_id == current_root_id:
                    self.local_qid_onto_map[go_id]['had_root_write'] = True
                    self.local_qid_onto_map[go_id]['parents'] = list(parents)
                    self.local_qid_onto_map[go_id]['children'] = list(children)

                current_node_qids.append(qid)
                print('QID created or retrieved', qid)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type='',
                            message='success{}'.format(new_msg),
                            wd_id=qid,
                            duration=time.time() - start))
                return qid, go_term_data['obo_xref'], wd_item.require_write

            except Exception as e:
                print(e)
                # traceback.print_exc(e)

                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='',
                            duration=time.time() - start))
                return None, None, None

        dt = []
        parent_qids = []
        write_reqired = []
        for parent_id in parents:
            pi, o, w = get_item_qid(parent_id)
            write_reqired.append(w)

            if pi:
                parent_qids.append(pi)
                dt.append(
                    PBB_Core.WDItemID(value=pi,
                                      prop_nr='P279',
                                      references=[self.create_reference()]))

        for edge in self.term_graph['edges']:
            if edge['uri'] in self.obo_wd_map and edge[
                    'uri'] != 'http://www.w3.org/2000/01/rdf-schema#subClassOf':
                go = edge['target'].split('_')[-1]
                if go != current_root_id:
                    xref_dict = self.obo_wd_map[edge['uri']]
                elif edge['uri'] in self.rev_prop_map and edge['source'].split(
                        '_')[-1] != current_root_id:
                    xref_dict = self.obo_wd_map[self.rev_prop_map[edge['uri']]]
                    go = edge['source'].split('_')[-1]
                else:
                    continue

                pi, o, w = get_item_qid(go_id=go)
                write_reqired.append(w)
                dt.append(
                    self.create_xref_statement(value=pi, xref_dict=xref_dict))

        root_qid, obsolete, w = get_item_qid(go_id=current_root_id, data=dt)
        if obsolete and not any(write_reqired):
            if self.use_prefix:
                id_string = '{}:{}'.format(self.ontology, current_root_id)
            else:
                id_string = current_root_id

            OBOImporter.cleanup_obsolete_edges(
                ontology_id=id_string,
                login=self.login_obj,
                core_property_nr=self.core_property_nr,
                current_node_qids=current_node_qids)

        print('----COUNT----:', len(self.local_qid_onto_map))
        f = open('temp_{}_onto_map.json'.format(self.ontology), 'w')
        f.write(json.dumps(self.local_qid_onto_map))
        f.close()
Example #28
0
    def __init__(self, object):
        # Uniprot
        self.logincreds = object["logincreds"]
        self.version = object["results"]["bindings"][0]["upversion"]["value"]
        self.wdid = object["wdid"]
        self.uniprot = object["results"]["bindings"][0]["uniprot"]["value"]
        print self.uniprot
        self.uniprotId = object["results"]["bindings"][0]["uniprot"][
            "value"].replace("http://purl.uniprot.org/uniprot/",
                             "").replace(" ", "")
        self.name = object["results"]["bindings"][0]["plabel"]["value"]
        if "ecName" in object["results"]["bindings"][0].keys():
            print object["results"]["bindings"][0]["ecName"]["value"]
            self.ecname = object["results"]["bindings"][0]["ecName"]["value"]
        self.alias = []
        for syn in object["results"]["bindings"][0]["upalias"]["value"].split(
                ";"):
            self.alias.append(syn)
        if "pdbid" in object["results"]["bindings"][0].keys():
            if object["results"]["bindings"][0]["pdbid"]["value"] != "":
                self.pdb = []
                for pdbId in object["results"]["bindings"][0]["pdbid"][
                        "value"].split(";"):
                    self.pdb.append(
                        pdbId.replace("http://rdf.wwpdb.org/pdb/",
                                      "").replace(" ", ""))
        if "refseq" in object["results"]["bindings"][0].keys():
            self.refseq = []
            for refseqId in object["results"]["bindings"][0]["refseqid"][
                    "value"].split(";"):
                self.refseq.append(
                    refseqId.replace("http://purl.uniprot.org/refseq/",
                                     "").replace(" ", ""))
        self.ensemblp = []
        for ensP in object["results"]["bindings"][0]["ensemblp"][
                "value"].split(";"):
            self.ensemblp.append(
                ensP.replace("http://purl.uniprot.org/ensembl/",
                             "").replace(" ", ""))
        protein_reference = {
            'ref_properties': [u'P143', 'TIMESTAMP'],
            'ref_values': [u'Q905695', 'TIMESTAMP']
        }
        print vars(self)
        references = dict()
        data2add = dict()

        # P279 = subclass of
        data2add["P279"] = ["8054"]
        references['P279'] = [copy.deepcopy(protein_reference)]

        # P703 = found in taxon
        data2add["P703"] = ["83310"]
        references['P703'] = [copy.deepcopy(protein_reference)]

        # P352 = UniprotID
        data2add["P352"] = [self.uniprotId]
        references['P352'] = [copy.deepcopy(protein_reference)]

        # P591 = EC number
        if "ecname" in vars(self):
            data2add["P591"] = [self.ecname]
            references['P591'] = [copy.deepcopy(protein_reference)]

        # P638 = PDBID
        if "pdb" in vars(self):
            print "len pdb = " + str(len(self.pdb))
            print self.pdb
            if len(self.pdb) > 0:
                data2add['P638'] = self.pdb
                references['P638'] = []
                for i in range(len(self.pdb)):
                    references['P638'].append(copy.deepcopy(protein_reference))

        # P637 = Refseq Protein ID
        if "refseq" in vars(self):
            if len(self.refseq) > 0:
                data2add['P637'] = self.refseq
                references['P637'] = []
                for i in range(len(self.refseq)):
                    references['P637'].append(copy.deepcopy(protein_reference))

        # P705 = Ensembl Protein ID
        if "ensemblp" in vars(self):
            if len(self.ensemblp) > 0:
                data2add['P705'] = self.ensemblp
                references['P705'] = []
                for i in range(len(self.ensemblp)):
                    references['P705'].append(copy.deepcopy(protein_reference))

        wdPage = PBB_Core.WDItemEngine(wd_item_id=self.wdid,
                                       item_name=self.name,
                                       data=data2add,
                                       server="www.wikidata.org",
                                       references=references,
                                       domain="proteins")
        self.wd_json_representation = wdPage.get_wd_json_representation()
        PBB_Debug.prettyPrint(self.wd_json_representation)
        wdPage.write(self.logincreds)
def encodes(gene_record, login):
    """
    identifies microbial gene and protein items and links them via encodes (P688) and encoded by (P702) functions
    :param gene_record: gene record from MGI_UNIP_MERGER()
    :return: links gene and protein wikidata items.
    """

    uniprot = str(list(gene_record['uniprot'].values())[0])
    start = time.time()
    #  find gene and protein qids
    gene_qid = wdo.WDSparqlQueries(prop='P351',
                                   string=gene_record['_id']).wd_prop2qid()
    protein_qid = wdo.WDSparqlQueries(prop='P352',
                                      string=uniprot).wd_prop2qid()
    print(gene_qid, protein_qid)

    # if a gene or protein item is not found skip this one

    if gene_qid is not None and protein_qid is not None:
        print('gene {} and protein {} found'.format(gene_qid, protein_qid))
        # generate reference and claim values for each item
        ncbi_gene_reference = wdo.reference_store(
            source='ncbi_gene', identifier=gene_record['_id'])
        gene_encodes = [
            PBB_Core.WDItemID(value=protein_qid,
                              prop_nr='P688',
                              references=[ncbi_gene_reference])
        ]
        protein_encoded_by = [
            PBB_Core.WDItemID(value=gene_qid,
                              prop_nr='P702',
                              references=[ncbi_gene_reference])
        ]
        # find and write items
        success_count = 0
        wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid,
                                                data=gene_encodes)
        #pprint.pprint(wd_encodes_item.get_wd_json_representation())

        try:
            wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid,
                                                    data=gene_encodes)
            wd_encodes_item.write(login)
            PBB_Core.WDItemEngine.log(
                'INFO',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type='',
                        message='encodes claim written successfully',
                        wd_id=wd_encodes_item.wd_item_id,
                        duration=time.time() - start))
            print('gene success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id='',
                        duration=time.time() - start))
        try:
            wd_encoded_by_item = PBB_Core.WDItemEngine(wd_item_id=protein_qid,
                                                       data=protein_encoded_by)

            wd_encoded_by_item.write(login)
            PBB_Core.WDItemEngine.log(
                'INFO',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=uniprot,
                        exception_type='',
                        message='encoded by claim written successfully',
                        wd_id=wd_encoded_by_item.wd_item_id,
                        duration=time.time() - start))
            print('protein success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id='',
                        duration=time.time() - start))

        if success_count == 2:
            return 'success'

    end = time.time()
    print('Time elapsed:', end - start)
Example #30
0
        def get_item_qid(go_id, data=()):
            start = time.time()

            # for efficiency reasons, skip if item already had a root write performed
            if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \
                    and 'qid' in self.local_qid_onto_map[go_id]:
                return self.local_qid_onto_map[go_id]['qid']

            try:
                data = list(data)

                r = requests.get(url=self.base_url +
                                 '{}_{}'.format(self.ontology, go_id),
                                 headers=self.headers)
                go_term_data = r.json()
                label = go_term_data['label']
                description = go_term_data['description'][0]

                if go_term_data['is_obsolete']:
                    OBOImporter.cleanup_obsolete_edges(
                        ontology_id='{}:{}'.format(self.ontology, go_id),
                        login=self.login_obj,
                        core_property_nr=self.core_property_nr,
                        obsolete_term=True)
                    return None

                # get parent ontology term info so item can be populated with description, etc.
                data.append(
                    PBB_Core.WDString(value='GO:{}'.format(go_id),
                                      prop_nr=self.core_property_nr,
                                      references=[self.create_reference()]))
                print(data)
                if go_id in self.local_qid_onto_map:
                    wd_item = PBB_Core.WDItemEngine(
                        wd_item_id=self.local_qid_onto_map[go_id]['qid'],
                        domain='obo',
                        data=data,
                        use_sparql=True)
                else:
                    wd_item = PBB_Core.WDItemEngine(item_name='test',
                                                    domain='obo',
                                                    data=data,
                                                    use_sparql=True)
                wd_item.set_label(label=label)
                if len(description) <= 250:
                    wd_item.set_description(description=description)
                else:
                    wd_item.set_description(description='Gene Ontology term')
                if go_term_data['synonyms'] is not None and len(
                        go_term_data['synonyms']) > 0:
                    aliases = []
                    for alias in go_term_data['synonyms']:
                        if len(alias) <= 250:
                            aliases.append(alias)

                    wd_item.set_aliases(aliases=aliases)

                new_msg = ''
                if wd_item.create_new_item:
                    new_msg = ': created new GO term'

                qid = wd_item.write(login=self.login_obj)

                if go_id not in self.local_qid_onto_map:
                    self.local_qid_onto_map[go_id] = {
                        'qid': qid,
                        'had_root_write': False,
                    }

                if go_id == current_root_id:
                    self.local_qid_onto_map[go_id]['had_root_write'] = True
                    self.local_qid_onto_map[go_id]['parents'] = list(parents)
                    self.local_qid_onto_map[go_id]['children'] = list(children)

                current_node_qids.append(qid)
                print('QID created or retrieved', qid)

                PBB_Core.WDItemEngine.log(
                    'INFO',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type='',
                            message='success{}'.format(new_msg),
                            wd_id=qid,
                            duration=time.time() - start))
                return qid

            except Exception as e:
                print(e)

                PBB_Core.WDItemEngine.log(
                    'ERROR',
                    '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                    .format(main_data_id='{}:{}'.format(self.ontology, go_id),
                            exception_type=type(e),
                            message=e.__str__(),
                            wd_id='',
                            duration=time.time() - start))
                return None