Esempio n. 1
0
def write2wikidata(qid, value):
    print("%s %s" % (qid, value))
    statement = wdi_core.WDMonolingualText(value=value,
                                           prop_nr='P2561',
                                           references=[[ref]])
    item = wdi_core.WDItemEngine(wd_item_id=qid,
                                 data=[statement],
                                 domain=None,
                                 use_sparql=True)
    item.write(login=login)
Esempio n. 2
0
    def append(self, datatype, prop_nr, value, qualifiers=[]):
        """
        Append a statement to a WikidataEntry.

        @param datatype: string, externalid, itemid, or date
        @param prop_nr: string Wikidata property ID (e.g. P31)
        @param value: string representing the value of the statement
        @param qualifiers: list of qualifiers that are WDBaseDataType children
        """
        statement = ''

        if len(value) == 0:
            return

        if datatype == 'string':
            statement = wdi_core.WDString(value=value,
                                          prop_nr=prop_nr,
                                          references=self.ref,
                                          qualifiers=qualifiers)
        elif datatype == 'monolingual':
            statement = wdi_core.WDMonolingualText(value=value,
                                                   prop_nr=prop_nr,
                                                   references=self.ref,
                                                   qualifiers=qualifiers,
                                                   language='en')
        elif datatype == 'externalid':
            statement = wdi_core.WDExternalID(value,
                                              prop_nr=prop_nr,
                                              references=self.ref,
                                              qualifiers=qualifiers)
        elif datatype == 'itemid':
            statement = wdi_core.WDItemID(value=value,
                                          prop_nr=prop_nr,
                                          references=self.ref,
                                          qualifiers=qualifiers)
        elif datatype == 'date':  # Technically this should be "time"
            statement = wdi_core.WDTime(value,
                                        prop_nr=prop_nr,
                                        references=self.ref,
                                        qualifiers=qualifiers)
        else:
            raise ValueError(
                '`datatype` should be string, externalid, itemid, or date')

        self.data.append(statement)
Esempio n. 3
0
def update_song(entity, song_obj, artist_wikidata_id, artist_name):
    existing_instanceOf_ids = get_wikidata_property_values(
        entity, INSTANCE_OF_ID)

    song_name = get_song_name(song_obj)
    data = entity.statements
    song_mb_id = get_musicbrainz_song_id(song_obj)
    # Set to be musician if not already
    if SONG_ID not in existing_instanceOf_ids:
        # Add occupation musician relation
        data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID))
    # Set MusicBrainzID if not already set
    if MUSIC_BRAINZ_SONG_PROP_ID not in entity.wd_json_representation[
            'claims']:
        data.append(
            wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj),
                                  prop_nr=MUSIC_BRAINZ_SONG_PROP_ID))
    elif song_mb_id not in get_wikidata_property_values(
            entity, MUSIC_BRAINZ_SONG_PROP_ID):
        data.append(
            wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj),
                                  prop_nr=MUSIC_BRAINZ_SONG_PROP_ID))
    # Set performer to be the artist if no performer set or performer is not artist
    if PERFORMER_ID not in entity.wd_json_representation['claims']:
        data.append(
            wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID))
    elif artist_wikidata_id not in get_wikidata_property_values(
            entity, PERFORMER_ID):
        data.append(
            wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID))
        # Set performer to be the artist if no performer set or performer is not artist
    # Set song name property
    if TITLE_ID not in entity.wd_json_representation['claims']:
        data.append(
            wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID))

    if get_description(entity) == '':
        entity.set_description(f"Song performed by {artist_name}")

    write_to_wikidata(entity, data)
    print(
        f"Song {song_name} by {artist_name} has been updated on WikiData server."
    )
Esempio n. 4
0
    def test_new_item_creation(self):
        data = [
            wdi_core.WDString(value='test', prop_nr='P1'),
            wdi_core.WDString(value='test1', prop_nr='P2'),
            wdi_core.WDMath("xxx", prop_nr="P3"),
            wdi_core.WDExternalID("xxx", prop_nr="P4"),
            wdi_core.WDItemID("Q123", prop_nr="P5"),
            wdi_core.WDTime('+%Y-%m-%dT%H:%M:%SZ', "P6"),
            wdi_core.WDUrl("http://www.google.com", "P7"),
            wdi_core.WDMonolingualText("xxx", prop_nr="P8"),
            wdi_core.WDQuantity(5, prop_nr="P9"),
            wdi_core.WDQuantity(5, upper_bound=9, lower_bound=2,
                                prop_nr="P10"),
            wdi_core.WDCommonsMedia("xxx", prop_nr="P11"),
            wdi_core.WDGlobeCoordinate(1.2345, 1.2345, 12, prop_nr="P12"),
            wdi_core.WDGeoShape("xxx", prop_nr="P13"),
            wdi_core.WDProperty("P123", "P14")
        ]
        core_props = set(["P{}".format(x) for x in range(20)])

        for d in data:
            item = wdi_core.WDItemEngine(item_name='dae',
                                         domain="szadf",
                                         data=[d],
                                         core_props=core_props)
            assert item.get_wd_json_representation()
            item = wdi_core.WDItemEngine(item_name='dae',
                                         domain="szadf",
                                         data=[d],
                                         core_props=set())
            assert item.get_wd_json_representation()

        item = wdi_core.WDItemEngine(item_name='dae',
                                     domain="szadf",
                                     data=data,
                                     core_props=core_props)
        assert item.get_wd_json_representation()
        item = wdi_core.WDItemEngine(item_name='dae',
                                     domain="szadf",
                                     data=data,
                                     core_props=set())
        assert item.get_wd_json_representation()
Esempio n. 5
0
def create_song(song_obj, artist_wikidata_id, artist_name):
    data = []
    song_name = get_song_name(song_obj)
    data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID))
    data.append(
        wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID))
    data.append(
        wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj),
                              prop_nr=MUSIC_BRAINZ_SONG_PROP_ID))
    data.append(wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID))

    entity = wdi_core.WDItemEngine(data=data)
    entity.set_label(song_name)
    entity.set_description(f"Song by {artist_name}")

    login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020')
    entity.write(login_instance)
    print(
        f"Song {song_name} by {artist_name} has been added to WikiData server."
    )
Esempio n. 6
0
def process_data(nioshtic_data):
    """
    Creates Wikidata items on most NIOSHTIC entries.

    Before you execute this method, make sure you have executed CreateJournalArticles.py
    and then Associator.py.

    This only handles creation. Filling in the columns from the rest of the
    NIOSHTIC dataset is handled by a separate class.

    @param nioshtic_data: dictionary with "entries" and "headers" keys
    """

    for entry in nioshtic_data['entries']:
        if 'Wikidata' in entry or 'NN' not in entry:
            continue

        if 'TI' not in entry:
            continue

        if 'SO' in entry:
            if entry['SO'].endswith(' :1'):
                continue  # Only one page, most likely a flyer

        if re.match(r'Youth@Work', entry['TI']) is not None \
        and re.match(r'edition', entry['TI']) is not None:
            continue

        ref = [[
            wdi_core.WDItemID(value='Q26822184',
                              prop_nr='P248',
                              is_reference=True),
            wdi_core.WDExternalID(entry['NN'],
                                  prop_nr='P2880',
                                  is_reference=True),
            wdi_core.WDTime(nioshtic_data['retrieved'],
                            prop_nr='P813',
                            is_reference=True)
        ]]

        data = [
            wdi_core.WDExternalID(entry['NN'], prop_nr='P2880',
                                  references=ref),
            wdi_core.WDItemID(value='Q60346', prop_nr='P859'),
            wdi_core.WDMonolingualText(value=entry['TI'],
                                       prop_nr='P1476',
                                       references=ref,
                                       language='en')
        ]

        t = JournalArticles.clean_title(entry['TI'])
        i = wdi_core.WDItemEngine(data=data,
                                  domain='nioshgreylit',
                                  item_name=t)
        i.set_label(t)

        try:
            print(i.write(WIKI_SESSION))
        except Exception as e:
            print(e)
            continue
Esempio n. 7
0
def run_one(pathway_id, retrieved, fast_run, write, login, temp):
    print(pathway_id)
    pathway_reference = create_reference(pathway_id, retrieved)
    prep = dict()

    prep = get_PathwayElements(pathway=pathway_id,
                               datatype="Metabolite",
                               temp=temp,
                               prep=prep)
    prep = get_PathwayElements(pathway=pathway_id,
                               datatype="GeneProduct",
                               temp=temp,
                               prep=prep)
    # P703 = found in taxon, Q15978631 = "H**o sapiens"
    prep["P703"] = [
        wdi_core.WDItemID(value="Q15978631",
                          prop_nr='P703',
                          references=[copy.deepcopy(pathway_reference)])
    ]

    query = """
            PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
            PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pathway ?pwId ?pwLabel ?description
        WHERE {
           VALUES ?pwId {"""
    query += "\"" + pathway_id + "\"^^xsd:string}"
    query += """
           ?pathway a wp:Pathway ;
                    dc:title ?pwLabel ;
                    dcterms:description ?description ;
                    dcterms:identifier ?pwId ;
                    <http://vocabularies.wikipathways.org/wp#isAbout> ?details ;
                    wp:organismName "H**o sapiens"^^xsd:string .
        }"""
    qres3 = temp.query(query)

    for row in qres3:
        #pathway_iri = str(row[0])
        pw_id = str(row[1])
        pw_label = str(row[2])
        description = str(row[3])

        ## clean up descriptions
        description = re.sub(r'https?:\/\/.*[\s\r\n]', '', description)
        description = description.replace('\n', ' ').replace(
            '\r', ' ').replace('\'\'\'',
                               '').replace('\'\'',
                                           '').replace('[',
                                                       '').replace(']', '')
        description = description.replace(
            'Proteins on this pathway have targeted assays available via the Portal',
            '')
        description = (description[:246] +
                       '...') if len(description) > 246 else description
        description = 'biological pathway in human' if len(
            description) < 20 else description

        # P31 = instance of
        prep["P31"] = [
            wdi_core.WDItemID(value="Q4915012",
                              prop_nr="P31",
                              references=[copy.deepcopy(pathway_reference)])
        ]

        prep["P1476"] = [
            wdi_core.WDMonolingualText(
                value=pw_label,
                prop_nr="P1476",
                references=[copy.deepcopy(pathway_reference)])
        ]

        # P2410 = WikiPathways ID
        prep["P2410"] = [
            wdi_core.WDString(pathway_id,
                              prop_nr='P2410',
                              references=[copy.deepcopy(pathway_reference)])
        ]

        # P2888 = exact match
        prep["P2888"] = [
            wdi_core.WDUrl("http://identifiers.org/wikipathways/" + pw_id,
                           prop_nr='P2888',
                           references=[copy.deepcopy(pathway_reference)])
        ]

        query = """
                PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
                PREFIX dcterms: <http://purl.org/dc/terms/>
                select ?pubmed

                WHERE {
                 ?pubmed  a       wp:PublicationReference ;
                        dcterms:isPartOf <"""

        query += str(row[0])
        query += """> .}

                """
        qres4 = temp.query(query)
        p = re.compile('^[0-9]+$')
        for pubmed_result in qres4:
            pmid = str(pubmed_result[0]).replace(
                "http://identifiers.org/pubmed/", "")
            print(pmid)
            m = p.match(pmid)

            if not m:
                pmid_qid, _, _ = wdi_helpers.PublicationHelper(
                    pmid, id_type="doi",
                    source="crossref").get_or_create(login if write else None)
            else:
                pmid_qid, _, _ = wdi_helpers.PublicationHelper(
                    pmid.replace("PMID:", ""),
                    id_type="pmid",
                    source="europepmc").get_or_create(login if write else None)
            if pmid_qid is None:
                return panic(pathway_id, "not found: {}".format(pmid), "pmid")
            else:
                if 'P2860' not in prep.keys():
                    prep["P2860"] = []
                print(pmid_qid)
                prep['P2860'].append(
                    wdi_core.WDItemID(
                        value=str(pmid_qid),
                        prop_nr='P2860',
                        references=[copy.deepcopy(pathway_reference)]))

        author_query = """
                PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
                SELECT ?author ?authorName ?authorHomepage ?authorQIRI
                WHERE {
                  <http://identifiers.org/wikipathways/""" + pathway_id + """> dc:creator ?author .
                  ?author a                    foaf:Person ;
                  foaf:name            ?authorName ;
                  foaf:homepage            ?authorHomepage .
                  OPTIONAL { ?author    owl:sameAs     ?authorQIRI . }
                }
                """
        author_query_res = temp.query(author_query)
        prep["P2093"] = []
        prep["P50"] = []

        for row in author_query_res:
            author_name = str(row[1])
            print("author_name")
            print(author_name)
            author_homepage = str(row[2])
            print("author_homepage")
            print(author_homepage)

            # P2093 = author name string
            author_url_qualifier = wdi_core.WDString(value=author_homepage,
                                                     prop_nr="P2699",
                                                     is_qualifier=True)
            prep["P2093"].append(
                wdi_core.WDString(
                    author_name,
                    prop_nr='P2093',
                    qualifiers=[copy.deepcopy(author_url_qualifier)],
                    references=[copy.deepcopy(pathway_reference)]))

            if row[3] != None:  # only if row[3] exists (authorQIRI)
                author_iri = str(row[0])
                author_name = str(row[1])
                print("author_name")
                print(author_name)
                author_qiri = str(row[3])
                if ("https://www.wikidata.org/wiki/" in author_qiri):
                    author_qid = author_qiri.replace(
                        "https://www.wikidata.org/wiki/", "")
                if ("http://www.wikidata.org/entity/" in author_qiri):
                    author_qid = author_qiri.replace(
                        "http://www.wikidata.org/entity/", "")
                print("author_qid")
                print(author_qid)
                # P50 = author
                prep["P50"].append(
                    wdi_core.WDItemID(
                        author_qid,
                        prop_nr='P50',
                        references=[copy.deepcopy(pathway_reference)]))

        disease_ontology_query = """
                PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
                PREFIX dcterms: <http://purl.org/dc/terms/>
                SELECT ?diseaseOntologyTerm
                WHERE {
                  ?pathwayRDF wp:diseaseOntologyTag ?diseaseOntologyTerm ;
                    foaf:page ?pathway ;
                    dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . 
                }
                """
        disease_ontology_query_res = temp.query(disease_ontology_query)
        prep["P1050"] = []
        for row in disease_ontology_query_res:
            disease_ontology_iri = str(row[0])
            doid = disease_ontology_iri.replace(
                "http://purl.obolibrary.org/obo/DOID_", "DOID:")
            print("doid")
            print(doid)

            # P1050 = medical condition
            if doid_qid.get(doid) != None:  #skip if qid is missing
                prep["P1050"].append(
                    wdi_core.WDItemID(
                        doid_qid[doid],
                        prop_nr='P1050',
                        references=[copy.deepcopy(pathway_reference)]))

        pw_ontology_query = """
                PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
                PREFIX dcterms: <http://purl.org/dc/terms/>
                SELECT ?pwOntologyTerm
                WHERE {
                  ?pathwayRDF wp:pathwayOntologyTag ?pwOntologyTerm ;
                    foaf:page ?pathway ;
                    dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . 
                }
                """
        pw_ontology_query_res = temp.query(pw_ontology_query)
        prep["P921"] = []
        for row in pw_ontology_query_res:
            pw_ontology_iri = str(row[0])
            poid = pw_ontology_iri.replace(
                "http://purl.obolibrary.org/obo/PW_", "PW:")
            print("poid")
            print(poid)

            # P921 = main subject
            if poid_qid.get(poid) != None:  #skip if qid is missing
                prep["P921"].append(
                    wdi_core.WDItemID(
                        poid_qid[poid],
                        prop_nr='P921',
                        references=[copy.deepcopy(pathway_reference)]))

        #TODO: Propose Cell Type Ontology ID as new property, add release item, associate terms with WD items.
        #cell_type_ontology_query = """
        #        PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
        #        PREFIX dcterms: <http://purl.org/dc/terms/>
        #        SELECT ?cellTypeOntologyTerm
        #        WHERE {
        #          ?pathwayRDF wp:cellTypeOntologyTag ?cellTypeOntologyTerm ;
        #            foaf:page ?pathway ;
        #            dcterms:identifier \"""" + pathway_id + """\"^^xsd:string .
        #        }
        #        """
        #cell_type_ontology_query_res = temp.query(cell_type_ontology_query)
        #prep["P927"] = []
        #for row in cell_type_ontology_query_res:
        #    cell_type_ontology_iri = str(row[0])
        #    ctoid = cell_type_ontology_iri.replace("http://purl.obolibrary.org/obo/CL_", "CL:")
        #    print("ctoid")
        #    print(ctoid)

        # P927 = anatomical location
        #    prep["P927"].append(wdi_core.WDItem(qid[ctoid], prop_nr='P927', references=[copy.deepcopy(pathway_reference)]))

        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        wdPage = wdi_core.WDItemEngine(
            data=data2add,
            fast_run=fast_run,
            fast_run_base_filter=fast_run_base_filter,
            fast_run_use_refs=True,
            ref_handler=update_retrieved_if_new_multiple_refs,
            core_props=core_props)

        wdPage.set_label(pw_label, lang="en")
        wdPage.set_description(description, lang="en")

        try_write(wdPage,
                  record_id=pathway_id,
                  record_prop=PROPS['Wikipathways ID'],
                  edit_summary="Updated a Wikipathways pathway",
                  login=login,
                  write=write)
Esempio n. 8
0
def geneSymbol_form(request):
    """
    uses wdi to make go annotation edit to wikidata
    :param request: includes go annotation json for writing to wikidata
    :return: response data object with a write success boolean
    """
    print("Gene Symbol Form")
    if request.method == 'POST':
        body_unicode = request.body.decode('utf-8')
        body = json.loads(body_unicode)
        responseData = {}
        if 'login' not in request.session.keys():
            responseData['authentication'] = False
            return JsonResponse(responseData)

        login = jsonpickle.decode(request.session['login'])

        statements = []
        refs = []
        eutilsPMID = body['pmid']
        # construct the references using WDI_core and PMID_tools if necessary
        try:
            refs.append(
                wdi_core.WDItemID(value='Q26489220',
                                  prop_nr='P1640',
                                  is_reference=True))
            refs.append(
                wdi_core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())),
                                prop_nr='P813',
                                is_reference=True))
            pub = PublicationHelper(eutilsPMID, 'pmid', 'europepmc')
            result = pub.get_or_create(login)
            if len(result) > 0 and result[0]:
                refs.append(
                    wdi_core.WDItemID(value=result[0],
                                      prop_nr='P248',
                                      is_reference=True))
        except Exception as e:
            print("reference construction error: " + str(e))

        statements.append(
            wdi_core.WDMonolingualText(value=body['geneSymbol'],
                                       prop_nr='P2561',
                                       references=[refs]))

        try:
            print("Writing to gene " + body['geneQID'])
            if body['geneQID'] != "":
                wd_item_gene = wdi_core.WDItemEngine(
                    wd_item_id=body['geneQID'], data=statements)
                wd_item_gene.set_aliases(aliases=[body['geneSymbol']])
                wd_item_gene.write(login=login)

            print("Writing to protein " + body['proteinQID'])
            if body['proteinQID'] != "":
                wd_item_protein = wdi_core.WDItemEngine(
                    wd_item_id=body['proteinQID'], data=statements)
                wd_item_protein.set_aliases(aliases=[body['geneSymbol']])
                wd_item_protein.write(login=login)

            responseData['write_success'] = True
        except Exception as e:
            responseData['write_success'] = False

        return JsonResponse(responseData)
Esempio n. 9
0
def get_data(manifest):
    """
    Method to retrieve data from PubMed, PubMed Central, and DOI.org databases.

    Have at least one of a DOI, PMCID, or PMID in each dictionary. From there,
    this method will query some friendly databases. You can also specify other
    Wikidata statements to add.

    @param manifest: a list of dictionaries, with the following keys and values:
                        doi:   string or None
                        pmcid: string or None
                        pmid:  string or None
                        data:  list of additional WDI objects (WDString etc.) to
                               incorporate into the output, or empty list
    @return list of Wikidata statement objects.
    """

    # To prevent weirdness from unexpected values
    for entry in manifest:
        for thing in entry.keys():
            if thing not in ['pmid', 'pmcid', 'doi', 'data']:
                raise ValueError(
                    'The only permitted keys are doi, pmcid, pmid, and data')

    months = {
        'Jan': '01',
        'Feb': '02',
        'Mar': '03',
        'Apr': '04',
        'May': '05',
        'Jun': '06',
        'Jul': '07',
        'Aug': '08',
        'Sep': '09',
        'Oct': '10',
        'Nov': '11',
        'Dec': '12'
    }

    # Initializing package, a list of objects containing a list of Wikidata item
    # objects and an object containing raw data. Each object in the package list
    # corresponds to the list entry in manifest.

    package = []

    lookup = {'pmid': [], 'pmcid': [], 'doi': []}

    # Associates an identifier with a given manifest/package entry
    associator = {'pmid': {}, 'pmcid': {}, 'doi': {}}

    counter = 0
    for entry in manifest:
        if 'data' not in entry:
            statements = []
        else:
            statements = entry['data']

        # Instance of: scientific article
        statements.append(wdi_core.WDItemID(value='Q13442814', prop_nr='P31'))

        package.append({'statements': statements, 'raw_data': {}, 'label': ''})

        # Append to the lookup lists. API lookups are done in bulk to cut down
        # on HTTP requests.
        for id_name, id_value in entry.items():
            if id_name == 'data' or id_value is None:  # not an ID
                continue
            if id_name == 'pmcid' and 'pmid' in entry:
                if entry['pmid'] is not None:
                    continue  # we don't need pmcid if we already have pmid
            lookup[id_name].append(id_value)
            associator[id_name][id_value] = counter

        counter += 1

    raw_data = {}
    raw_data['pmid'] = get_pubmed(lookup['pmid'])
    raw_data['pmcid'] = get_pubmed_central(lookup['pmcid'])
    raw_data['doi'] = get_doi_org(lookup['doi'])

    # Now that the requests are done, we want to painstakingly re-associate each
    # result object with the corresponding list in the package. This is mostly
    # to keep me from going crazy.

    for data_source, data_object in raw_data.items():
        for identifier, result in data_object.items():
            index = associator[data_source][identifier]
            package[index]['raw_data'][data_source] = result

    counter = 0
    for entry in package:

        # We only query PubMed in one place of two. It's basically the same API
        # but drawing from a different dataset.
        pubmed_data = {}
        if 'pmcid' in entry['raw_data']:
            pubmed_data_source = ('pmc', 'Q229883'
                                  )  # for use in refsnak generator
            pubmed_data = entry['raw_data']['pmcid']
        elif 'pmid' in entry['raw_data']:
            pubmed_data_source = ('pubmed', 'Q180686')
            pubmed_data = entry['raw_data']['pmid']

        doi_data = {}
        if 'doi' in entry['raw_data']:
            doi_data = entry['raw_data']['doi']

        # If we have data from both PubMed and DOI.org, we are interested in
        # both. PubMed/PubMed Central has article IDs, while DOI.org has better
        # author names and better data overall.

        # Initializing statement variables to prevent duplication/overwrites.
        statement_title = None
        statement_doi = None
        statement_pmid = None
        statement_pmcid = None
        statement_pubdate = None
        statement_publishedin = None
        statement_volume = None
        statement_issue = None
        statement_pages = None
        statement_origlanguage = None
        statement_authors = []

        if doi_data != {}:
            doi_ref = generate_refsnak(
                'Q28946522', 'https://doi.org/' +
                urllib.parse.quote_plus(manifest[counter]['doi']),
                doi_data['__querydate'])

            if 'title' in doi_data and statement_title is None:
                t = clean_title(doi_data['title'])
                if t != '' and t is not None and len(t) < 400:
                    statement_title = wdi_core.WDMonolingualText(
                        value=t,
                        prop_nr='P1476',
                        references=doi_ref,
                        language='en')
                    package[counter]['statements'].append(statement_title)

                    if len(t) < 250:
                        package[counter]['label'] = t

            if 'DOI' in doi_data and statement_doi is None:
                statement_doi = wdi_core.WDExternalID(doi_data['DOI'].upper(),
                                                      prop_nr='P356',
                                                      references=doi_ref)
                package[counter]['statements'].append(statement_doi)

            if 'issued' in doi_data and statement_pubdate is None:
                date_parts = doi_data['issued']['date-parts'][0]

                if date_parts != [None]:
                    y = str(date_parts[0])
                    m = '00'
                    d = '00'

                    precision = 9
                    if len(date_parts) >= 2:
                        m = str(date_parts[1]).zfill(2)
                        precision = 10

                    if len(date_parts) == 3:
                        d = str(date_parts[2]).zfill(2)
                        precision = 11

                    to_add = '+{0}-{1}-{2}T00:00:00Z'.format(y, m, d)
                    statement_pubdate = wdi_core.WDTime(to_add,
                                                        precision=precision,
                                                        prop_nr='P577',
                                                        references=doi_ref)
                    package[counter]['statements'].append(statement_pubdate)

            if 'ISSN' in doi_data and statement_publishedin is None:
                journal = issn_to_wikidata(doi_data['ISSN'][0])
                if journal is not None:
                    statement_publishedin = wdi_core.WDItemID(
                        value=journal, prop_nr='P1433', references=doi_ref)
                    package[counter]['statements'].append(
                        statement_publishedin)

            if 'volume' in doi_data and statement_volume is None:
                statement_volume = wdi_core.WDString(value=doi_data['volume'],
                                                     prop_nr='P478',
                                                     references=doi_ref)
                package[counter]['statements'].append(statement_volume)

            if 'issue' in doi_data and statement_issue is None:
                statement_issue = wdi_core.WDString(value=doi_data['issue'],
                                                    prop_nr='P433',
                                                    references=doi_ref)
                package[counter]['statements'].append(statement_issue)

            if 'page' in doi_data and statement_pages is None:
                if doi_data['page'] != '' and doi_data['page'] is not None:
                    statement_pages = wdi_core.WDString(value=doi_data['page'],
                                                        prop_nr='P304',
                                                        references=doi_ref)
                    package[counter]['statements'].append(statement_pages)

            if 'author' in doi_data and statement_authors == []:
                author_counter = 0
                for author in doi_data['author']:
                    author_counter += 1
                    a = ''
                    if 'family' in author:
                        a = author['family']
                    if 'given' in author:
                        a = author['given'] + ' ' + a
                    a = clean_title(a)
                    qualifier = wdi_core.WDString(value=str(author_counter),
                                                  prop_nr='P1545',
                                                  is_qualifier=True)
                    statement_author = wdi_core.WDString(
                        value=a,
                        prop_nr='P2093',
                        qualifiers=[qualifier],
                        references=doi_ref)
                    statement_authors.append(statement_author)
                for statement in statement_authors:
                    package[counter]['statements'].append(statement)

        if pubmed_data != {}:
            u = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db={0}&retmode=json&id={1}'
            pubmed_ref = generate_refsnak(
                pubmed_data_source[1],
                u.format(pubmed_data_source[0], pubmed_data['uid']),
                pubmed_data['__querydate'])

            if 'title' in pubmed_data and statement_title is None:
                t = clean_title(pubmed_data['title'])
                if t != '' and len(t) < 400:
                    statement_title = wdi_core.WDMonolingualText(
                        value=t,
                        prop_nr='P1476',
                        references=pubmed_ref,
                        language='en')
                    package[counter]['statements'].append(statement_title)

                    if len(t) < 250:
                        package[counter]['label'] = t

            if 'articleids' in pubmed_data:
                for block in pubmed_data['articleids']:
                    if block['idtype'] == 'pmc' and statement_pmcid is None:
                        pmcid = block['value'].replace('PMC', '')
                        statement_pmcid = wdi_core.WDExternalID(
                            pmcid, prop_nr='P932', references=pubmed_ref)
                        package[counter]['statements'].append(statement_pmcid)
                    elif block['idtype'] == 'pmcid' and statement_pmcid is None:
                        pmcid = block['value'].replace('PMC', '')
                        statement_pmcid = wdi_core.WDExternalID(
                            pmcid, prop_nr='P932', references=pubmed_ref)
                        package[counter]['statements'].append(statement_pmcid)
                    elif block['idtype'] == 'doi' and statement_doi is None:
                        doi = block['value'].upper()
                        statement_doi = wdi_core.WDExternalID(
                            doi, prop_nr='P356', references=pubmed_ref)
                        package[counter]['statements'].append(statement_doi)
                    elif block['idtype'] in ['pmid', 'pubmed'
                                             ] and statement_pmid is None:
                        pmid = block['value']
                        if pmid != 0 and pmid != '0':
                            statement_pmid = wdi_core.WDExternalID(
                                pmid, prop_nr='P698', references=pubmed_ref)
                            package[counter]['statements'].append(
                                statement_pmid)

            if 'pubdate' in pubmed_data and statement_pubdate is None:
                pubdate = None
                precision = None
                pubdate_raw = pubmed_data['pubdate'].split(' ')  # 2016 Aug 1
                if len(pubdate_raw) > 1:
                    if pubdate_raw[1] in months:
                        m = months[pubdate_raw[1]]
                    else:
                        m = '00'
                if len(pubdate_raw) == 3:  # Precision to the day
                    allowed_dates = [str(x).zfill(2) for x in range(1, 32)]
                    if pubdate_raw[2].zfill(2) in allowed_dates:
                        pubdate = "+{0}-{1}-{2}T00:00:00Z".format(
                            pubdate_raw[0], m, pubdate_raw[2].zfill(2))
                        precision = 11
                    else:
                        pubdate = "+{0}-{1}-00T00:00:00Z".format(
                            pubdate_raw[0], m)
                        precision = 10
                elif len(pubdate_raw) == 2:  # Precision to the month
                    pubdate = "+{0}-{1}-00T00:00:00Z".format(pubdate_raw[0], m)
                    precision = 10
                elif len(pubdate_raw) == 1:  # Precision to the year
                    pubdate = "+{0}-00-00T00:00:00Z".format(pubdate_raw[0])
                    precision = 9

                if pubdate is not None and precision is not None:
                    if re.match(r'\+\d{4}-\d{2}-\d{2}T00:00:00Z',
                                pubdate) is not None:
                        statement_pubdate = wdi_core.WDTime(
                            pubdate,
                            precision=precision,
                            prop_nr='P577',
                            references=pubmed_ref)
                        package[counter]['statements'].append(
                            statement_pubdate)

            if 'issn' in pubmed_data and statement_publishedin is None:
                if pubmed_data['issn'] != '':
                    journal = issn_to_wikidata(pubmed_data['issn'])
                    if journal is not None:
                        statement_publishedin = wdi_core.WDItemID(
                            value=journal,
                            prop_nr='P1433',
                            references=pubmed_ref)
                        package[counter]['statements'].append(
                            statement_publishedin)

            if 'volume' in pubmed_data and statement_volume is None:
                if pubmed_data['volume'] != '':
                    statement_volume = wdi_core.WDString(
                        value=pubmed_data['volume'],
                        prop_nr='P478',
                        references=pubmed_ref)
                    package[counter]['statements'].append(statement_volume)

            if 'issue' in pubmed_data and statement_issue is None:
                if pubmed_data['issue'] != '':
                    statement_issue = wdi_core.WDString(
                        value=pubmed_data['issue'],
                        prop_nr='P433',
                        references=pubmed_ref)
                    package[counter]['statements'].append(statement_issue)

            if 'pages' in pubmed_data and statement_pages is None:
                if pubmed_data['pages'] != '':
                    statement_pages = wdi_core.WDString(
                        value=pubmed_data['pages'],
                        prop_nr='P304',
                        references=pubmed_ref)
                    package[counter]['statements'].append(statement_pages)

            if 'lang' in pubmed_data and statement_origlanguage is None:
                for langcode in pubmed_data['lang']:
                    # Please post a comment on this webzone if you know the
                    # other possible values for 'lang'
                    if langcode == 'eng':
                        statement_origlanguage = wdi_core.WDItemID(
                            value='Q1860',
                            prop_nr='P364',
                            references=pubmed_ref)
                        package[counter]['statements'].append(
                            statement_origlanguage)

            if 'authors' in pubmed_data and statement_authors == []:
                author_counter = 0
                for author in pubmed_data['authors']:
                    if author['authtype'] == "Author":
                        author_counter += 1
                        a = clean_title(author['name'])
                        qualifier = wdi_core.WDString(
                            value=str(author_counter),
                            prop_nr='P1545',
                            is_qualifier=True)
                        statement_author = wdi_core.WDString(
                            value=a,
                            prop_nr='P2093',
                            qualifiers=[qualifier],
                            references=pubmed_ref)
                        statement_authors.append(statement_author)
                for statement in statement_authors:
                    package[counter]['statements'].append(statement)

        counter += 1

    return package
Esempio n. 10
0
login_instance = wdi_login.WDLogin(user=cred['user'], pwd=cred['password'])

r = wdi_core.WDItemEngine.execute_sparql_query(q)

for binding in r['results']['bindings']:
    item = binding['item']['value'].rpartition('/')[2]
    pdf = binding['url']['value']
    statements = []

    try:
        ref_title = binding['title']['value']
        ref_date = binding['date']['value']
        refs = [[
            wdi_core.WDUrl(pdf, PROPS['ref_url'], is_reference=True),
            wdi_core.WDMonolingualText(ref_title,
                                       PROPS['title'],
                                       language='sv',
                                       is_reference=True),
            wdi_core.WDTime(f'+{ref_date}', PROPS['date'], is_reference=True),
            wdi_core.WDTime(datetime.utcnow().strftime('+%Y-%m-%dT00:00:00Z'),
                            PROPS['retrieved'],
                            is_reference=True),
        ]]
    except Exception:
        refs = [[
            wdi_core.WDUrl(pdf, PROPS['ref_url'], is_reference=True),
            wdi_core.WDTime(f'+{ref_date}', PROPS['date'], is_reference=True),
            wdi_core.WDTime(datetime.utcnow().strftime('+%Y-%m-%dT00:00:00Z'),
                            PROPS['retrieved'],
                            is_reference=True),
        ]]
    try:
Esempio n. 11
0
def run_one(pathway_id, retrieved, fast_run, write, login, temp):
    print(pathway_id)
    pathway_reference = create_reference(pathway_id, retrieved)
    prep = dict()

    prep = get_PathwayElements(pathway=pathway_id,
                               datatype="Metabolite",
                               temp=temp,
                               prep=prep)
    prep = get_PathwayElements(pathway=pathway_id,
                               datatype="GeneProduct",
                               temp=temp,
                               prep=prep)
    # P703 = found in taxon, Q15978631 = "H**o sapiens"
    prep["P703"] = [
        wdi_core.WDItemID(value="Q15978631",
                          prop_nr='P703',
                          references=[copy.deepcopy(pathway_reference)])
    ]

    query = """
            PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
            PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pathway ?pwId ?pwLabel
        WHERE {
           VALUES ?pwId {"""
    query += "\"" + pathway_id + "\"^^xsd:string}"
    query += """
           ?pathway a wp:Pathway ;
                    dc:title ?pwLabel ;
                    dcterms:identifier ?pwId ;
                    <http://vocabularies.wikipathways.org/wp#isAbout> ?details ;
                    wp:organismName "H**o sapiens"^^xsd:string .
        }"""
    qres3 = temp.query(query)

    for row in qres3:
        # P31 = instance of
        prep["P31"] = [
            wdi_core.WDItemID(value="Q4915012",
                              prop_nr="P31",
                              references=[copy.deepcopy(pathway_reference)])
        ]

        prep["P1476"] = [
            wdi_core.WDMonolingualText(
                value=str(row[2]),
                prop_nr="P1476",
                references=[copy.deepcopy(pathway_reference)])
        ]

        # P2410 = WikiPathways ID
        prep["P2410"] = [
            wdi_core.WDString(pathway_id,
                              prop_nr='P2410',
                              references=[copy.deepcopy(pathway_reference)])
        ]

        # P2888 = exact match
        prep["P2888"] = [
            wdi_core.WDUrl("http://identifiers.org/wikipathways/" +
                           str(row[1]),
                           prop_nr='P2888',
                           references=[copy.deepcopy(pathway_reference)])
        ]

        query = """
                PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>
                PREFIX dcterms: <http://purl.org/dc/terms/>
                select ?pubmed

                WHERE {
                 ?pubmed  a       wp:PublicationReference ;
                        dcterms:isPartOf <"""

        query += str(row[0])
        query += """> .}

                """
        qres4 = temp.query(query)
        p = re.compile('^[0-9]+$')
        for pubmed_result in qres4:
            pmid = str(pubmed_result[0]).replace(
                "http://identifiers.org/pubmed/", "")
            print(pmid)
            m = p.match(pmid)

            if not m:
                pmid_qid, _, _ = wdi_helpers.PublicationHelper(
                    pmid, id_type="doi",
                    source="crossref").get_or_create(login if write else None)
            else:
                pmid_qid, _, _ = wdi_helpers.PublicationHelper(
                    pmid.replace("PMID:", ""),
                    id_type="pmid",
                    source="europepmc").get_or_create(login if write else None)
            if pmid_qid is None:
                return panic(pathway_id, "not found: {}".format(pmid), "pmid")
            else:
                if 'P2860' not in prep.keys():
                    prep["P2860"] = []
                prep['P2860'].append(
                    wdi_core.WDItemID(
                        value=str(pmid_qid),
                        prop_nr='P2860',
                        references=[copy.deepcopy(pathway_reference)]))

        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        wdPage = wdi_core.WDItemEngine(
            data=data2add,
            fast_run=fast_run,
            fast_run_base_filter=fast_run_base_filter,
            fast_run_use_refs=True,
            ref_handler=update_retrieved_if_new_multiple_refs,
            core_props=core_props)

        wdPage.set_label(str(row[2]), lang="en")
        wdPage.set_description("biological pathway in human", lang="en")

        try_write(wdPage,
                  record_id=pathway_id,
                  record_prop=PROPS['Wikipathways ID'],
                  edit_summary="Updated a Wikipathways pathway",
                  login=login,
                  write=write)