Esempio n. 1
0
def execute_sparql_query(query_text, endpoint='http://avalanche.scripps.edu:9999/bigdata/sparql'):
    # Execute the qurey
    result = WDItemEngine.execute_sparql_query(query_text, endpoint=endpoint, as_dataframe=True)
    # Don't do any processing if empy result
    if len(result) == 0:
        return result
    # Enforce the proper column order and return
    col_order = get_col_order(query_text)
    return parse_result_uris(result)[col_order]
Esempio n. 2
0
def get_property_types():
    # {'CommonsMedia', 'Time', 'Quantity', 'WikibaseProperty', 'WikibaseItem', 'GlobeCoordinate',
    # 'String', 'ExternalId', 'Math', 'Monolingualtext', 'TabularData', 'Url', 'GeoShape'}
    query = "SELECT ?p ?pt WHERE {?p wikibase:propertyType ?pt}"
    results = WDItemEngine.execute_sparql_query(query)['results']['bindings']
    results = [{k: v['value'] for k, v in item.items()} for item in results]
    prop_wdtype = {
        x['p'].split("/")[-1]: x['pt'].split("#")[-1]
        for x in results
    }
    return prop_wdtype
 def check_existing_label(self, label_str):
     query_file = open('query_templates/CHECK_EXISTING_LABEL_QUERY.rq', 'r')
     query_template = query_file.read()
     QUERY = query_template
     QUERY = QUERY.replace("#QID#",
                           self.Organism_QID).replace("#LABEL#", label_str)
     results = WDItemEngine.execute_sparql_query(
         QUERY)['results']['bindings']
     query_file.close()
     if len(results) == 0:
         return False
     else:
         return True
Esempio n. 4
0
def get_wikidata_complexes():
    """Gets all Wikidata items with a Complex Portal ID property"""

    get_macromolecular = """
    SELECT ?item ?ComplexPortalID
    WHERE 
    {
    ?item wdt:P7718 ?ComplexPortalID .
    }"""
    wikidata_complexes = WDItemEngine.execute_sparql_query(
        get_macromolecular,
        as_dataframe=True).replace({"http://www.wikidata.org/entity/": ""},
                                   regex=True)

    return wikidata_complexes
 def get_interacted_RNA_references(self):
     interacted_RNA_references = []
     row_nums = 0
     query_file = open('ALL_INTERACTED_SRNA_QUERY.rq', 'r')
     query_template = query_file.read()
     query_file.close()
     QUERY = query_template
     QUERY = QUERY.replace("#QID#", self.QID)
     results = WDItemEngine.execute_sparql_query(
         QUERY)['results']['bindings']
     if len(results) != 0:
         for result in results:
             row_nums += 1
             interacted_RNA_references.append([
                 row_nums, result['rnaLabel']['value'],
                 result['propLabel']['value'],
                 result['targetLabel']['value'],
                 f"{result['quote']['value']}"
                 '</br><a target="_self" href="Article_Viewer.html?article_PMCID='
                 f"{result['PMCID']['value']}"
                 f"&quote={urllib.parse.quote_plus(result['quote']['value'])}"
                 '">Read this in the article</a>',
                 '<div class="form-control"><a target="_blank" href="'
                 f"{result['rna']['value']}"
                 '"><img src="static/images/Interact_logo_Wikidata.png" '
                 'height="30px" class="rounded"></a></div>'
             ])
     else:
         return "Query returns nothing."
     data_tbl_cols = [
         '#', 'sRNA', 'Type of Regulation', 'Target Gene', 'Quote', 'Source'
     ]
     data_tbl_df = pd.DataFrame(interacted_RNA_references,
                                columns=data_tbl_cols)
     pd.set_option('display.max_colwidth', -1)
     data_tbl = data_tbl_df.to_html(index=False,
                                    escape=False,
                                    bold_rows=False,
                                    max_rows=None,
                                    max_cols=None,
                                    table_id="data_tbl",
                                    justify="center")
     data_tbl = data_tbl.replace('border="1" ', "")
     data_tbl = data_tbl.replace(
         'class="dataframe" ', 'class="display responsive no-wrap" '
         'style="font-family: Courier New; font-size: 13px;"')
     final_html = f"<div><h4>Referenced items: {self.get_wd_label()}</h4></div>{data_tbl}"
     return final_html
 def get_wd_label(self):
     query_file = open('Label_Fetch_Query.rq', 'r')
     query_template = query_file.read()
     QUERY = query_template
     QUERY = QUERY.replace("#QID#", self.QID)
     results = WDItemEngine.execute_sparql_query(
         QUERY)['results']['bindings']
     item = ""
     if len(results) == 0:
         print("Query returns no items for the specified Q-ID.")
     elif len(results) == 1:
         for result in results:
             item = result['label']['value']
     else:
         print("Query returns more that Item for the same Q-ID.")
     query_file.close()
     return item
 def get_QID(self, query):
     item_QID = []
     results = WDItemEngine.execute_sparql_query(
         query)['results']['bindings']
     if len(results) == 0:
         item_QID.append("NOT_FOUND_IN_WD")
     elif len(results) == 1:
         for result in results:
             item_QID.append(result['item']['value'].replace(
                 "http://www.wikidata.org/entity/", ""))
     else:
         for result in results:
             item_QID.append(result['item']['value'].replace(
                 "http://www.wikidata.org/entity/", ""))
             print(
                 "Warning: Query returns more than one item for the same gene name! Selected: "
                 + result['item']['value'])
     return item_QID
Esempio n. 8
0
def search_for_drug(drug_name):
    query = """SELECT ?item ?label WHERE {
      SERVICE wikibase:mwapi {
          bd:serviceParam wikibase:api "EntitySearch" .
          bd:serviceParam wikibase:endpoint "www.wikidata.org" .
          bd:serviceParam mwapi:search "***s***" .
          bd:serviceParam mwapi:language "en" .
          ?item wikibase:apiOutputItem mwapi:item .
          ?label wikibase:apiOutputItem mwapi:label
      }
      ?item (wdt:P279|wdt:P31) wd:Q11173 .
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }""".replace("***s***", drug_name)
    results = [{
        k: v['value'].replace("http://www.wikidata.org/entity/", "")
        for k, v in x.items()
    } for x in WDItemEngine.execute_sparql_query(query)['results']['bindings']]
    return results
Esempio n. 9
0
def get_wikidata_item_by_propertyvalue(property, value):
    """Gets a Wikidata item for a determined property-value pair

    Args:
        property (str): The property to search
        value (str): The value of said property
    """

    query_result = WDItemEngine.execute_sparql_query(
        f'SELECT distinct ?item WHERE {{ ?item wdt:{property} "{value}" }}')
    try:
        match = query_result["results"]["bindings"][0]
    except IndexError:
        print(f"Couldn't find item for {value}")
        return pd.np.NaN
    qid = match["item"]["value"]

    qid = qid.split("/")[4]
    return qid
Esempio n. 10
0
 def get_existing(cls):
     # get existing combinations:
     query_str = """SELECT ?item ?itemLabel (GROUP_CONCAT(?part; separator=";") as ?f) WHERE {
       ?item wdt:P527 ?part .
       ?item wdt:P31|wdt:P279 wd:Q1304270 .
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
     } GROUP BY ?item ?itemLabel"""
     results = WDItemEngine.execute_sparql_query(
         query_str)['results']['bindings']
     qid_combo = {
         x['item']['value'].replace("http://www.wikidata.org/entity/", ""):
         frozenset([
             y.replace("http://www.wikidata.org/entity/", "")
             for y in x['f']['value'].split(";")
         ])
         for x in results
     }
     combo_qid = {v: k for k, v in qid_combo.items()}
     assert len(combo_qid) == len(qid_combo)
     cls.combo_qid = combo_qid
     cls.qid_combo = qid_combo
 def get_QID_for_duplicated(self, locus_tag):
     item_QID = ""
     query_file = open('query_templates/FIND_ITEM_BY_LOCUS_TAG.rq', 'r')
     query_template = query_file.read()
     QUERY = query_template
     QUERY = QUERY.replace("#QID#", self.Organism_QID).replace(
         "#LOCUS_TAG#", locus_tag)
     results = WDItemEngine.execute_sparql_query(
         QUERY)['results']['bindings']
     if len(results) == 0:
         print("Query returns no items for the specified Q-ID.")
     elif len(results) == 1:
         for result in results:
             item_QID = result['item']['value']
     else:
         for result in results:
             item_QID = result['item']['value']
             break
         print("Query returns more that Item for the same Q-ID.")
     query_file.close()
     return item_QID
Esempio n. 12
0
def get_doid_qid_map():
    # get a good QID to DOID map, using exact match only
    query = """
    SELECT distinct ?disease ?doid ?mrt WHERE {
      ?disease p:P699 ?s_doid .
      ?s_doid ps:P699 ?doid .
      OPTIONAL {?s_doid pq:P4390 ?mrt} .
    }
    """
    df = WDItemEngine.execute_sparql_query(query, as_dataframe=True)
    df.disease = df.disease.str.replace("http://www.wikidata.org/entity/", "")
    df = df[df.mrt.isnull() |
            (df.mrt == "http://www.wikidata.org/entity/Q39893449")]
    df.drop_duplicates(subset=['disease', 'doid'], inplace=True)
    # make sure one doid goes with one qid
    bad1 = df[df.duplicated("disease", keep=False)]
    bad2 = df[df.duplicated("doid", keep=False)]
    # get rid of these baddies
    df = df[~df.index.isin(bad1.index)]
    df = df[~df.index.isin(bad2.index)]
    doid_qid = dict(zip(df.doid, df.disease))

    return doid_qid
Esempio n. 13
0
def get_wikidata_do_mesh():
    # get mesh xrefs, and including mapping relation type
    # {'DOID:0050856': {'skos:broadMatch_D019958'}}
    query = """
    select ?item ?doid ?mesh ?mesh_rt where {
      ?item wdt:P699 ?doid .
      ?item p:P486 ?mesh_s .
      ?mesh_s ps:P486 ?mesh .
      optional { ?mesh_s pq:P4390 ?mesh_rt }
    }"""
    results = WDItemEngine.execute_sparql_query(query)['results']['bindings']
    results = [{k: v['value'].replace("http://www.wikidata.org/entity/", "") for k, v in item.items()} for item in
               results]

    df = pd.DataFrame(results)
    df['mesh_rt'] = df.apply(lambda row: QID_MAP_REL_TYPE_CURIE[row.mesh_rt] + "_MESH:" + row.mesh, axis=1)

    df['_item'] = df['item']
    r = df.groupby("_item").aggregate(lambda x: set(y for y in x if not pd.isnull(y))).to_dict("records")
    wd = {list(x['doid'])[0]: x for x in r}
    wd = {k: v['mesh_rt'] for k, v in wd.items()}
    wd = {k: v for k, v in wd.items() if v}
    return wd
Esempio n. 14
0
def get_wikidata_do_xrefs():
    """
    From wikidata, get all items with a DOID on them. Get all of the following external-ids (defined in PREFIXES)
    :return: dict. values look like:
    'DOID:8499': {'ICD10CM': {'H53.6', 'H53.60'},
      'ICD9CM': {'368.6', '368.60'},
      'MESH': {'D009755'},
      'NCI': {'C34850', 'C37997'},
      'UMLS_CUI': {'C0028077'},
      'disease': 'Q7758678',
      'doid': 'DOID:8499'}
    """

    # get xrefs on items where DO is an exact match, and the statement doesn't have a MONDO reference
    # getting ones in which it has no reference, or it has a reference which is not from mondo
    # is too complicated, so get the ref info and we'll filter out the mondo ones afterwards
    query_template = """
    SELECT ?disease ?doid ?xref ?match ?mondo WHERE {{
      ?disease p:P699 ?s_doid .
      ?s_doid ps:P699 ?doid .
      OPTIONAL {{ ?s_doid pq:P4390 ?match}}
      ?disease p:{xref_pid} ?s_xref .
      ?s_xref ps:{xref_pid} ?xref .
      OPTIONAL {{?s_xref prov:wasDerivedFrom ?ref .
                       ?ref pr:P5270 ?mondo }}
    }}
    """

    # these match the prefixes in DO (but uppercase)
    xref_pid = {
        'omim': 'P492',
        'icd9cm': 'P1692',
        'icd10cm': 'P4229',
        'ordo': 'P1550',
        'gard': 'P4317',
        'mesh': 'P486',
        'UMLS_CUI': 'P2892',
        'nci': 'P1748',
    }

    dfs = []
    for xref, pid in xref_pid.items():
        query = query_template.format(xref_pid=pid)
        df = WDItemEngine.execute_sparql_query(query, as_dataframe=True)
        # has no reference but if it does, is not a mondo reference
        df = df[df.mondo.isnull()]
        # has no qualifier or is an exact match
        df = df[df.match.isnull() |
                (df.match == "http://www.wikidata.org/entity/Q39893449")]
        df = df.groupby('doid').xref.apply(
            lambda x: ",".join(x)).reset_index().set_index("doid")
        df.rename(columns={'xref': xref.upper()}, inplace=True)
        dfs.append(df)

    # join all of these dfs together
    dfj = reduce(lambda x, y: x.join(y), dfs)

    d = dict()
    for doid, row in dfj.iterrows():
        s = set()
        for k, v in row.to_dict().items():
            if not pd.isnull(v):
                s.update(k + ":" + vv for vv in v.split(","))
        d[doid] = {'xref': s}

    return d
Esempio n. 15
0
def process_query_string(query):
    result = WDItemEngine.execute_sparql_query(query)
    bindings = result['results'].get('bindings')
    return format_wikidata_bindings(bindings)
Esempio n. 16
0
combo = set(df[(df.Drug.str.count(";") > 0)].Drug)
combo_parts_qid = {
    k: frozenset(name_qid.get(kk.lower()) for kk in k.split(";"))
    for k in combo
}
#qid_combo_parts = {v:k for k,v in combo_parts_qid.items()}
# combo = set([x for x in combo if all(y.lower() in name_qid for y in x.split(";"))])  # some have families
# will have to make these combinations in wikidata

# get existing combinations:
query_str = """SELECT ?item ?itemLabel (GROUP_CONCAT(?part; separator=";") as ?f) WHERE {
  ?item wdt:P527 ?part .
  ?item wdt:P31 wd:Q1304270 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?item ?itemLabel"""
results = WDItemEngine.execute_sparql_query(query_str)['results']['bindings']
combo_qid = {
    x['item']['value'].replace("http://www.wikidata.org/entity/", ""):
    frozenset([
        y.replace("http://www.wikidata.org/entity/", "")
        for y in x['f']['value'].split(";")
    ])
    for x in results
}
qid_combo = {v: k for k, v in combo_qid.items()}
assert len(combo_qid) == len(qid_combo)

# ---------------- Create combination treatment items
login = WDLogin(user=WDUSER, pwd=WDPASS)
for name_str, items in combo_parts_qid.items():
    if not all(x for x in items):
Esempio n. 17
0
from wikidataintegrator.wdi_core import WDItemEngine
from scheduled_bots.local import GREGUSER, GREGPASS

login = WDLogin(GREGUSER, GREGPASS)
log_path = "/home/gstupp/projects/wikidata-biothings/scheduled_bots/scheduled_bots/ontology/logs/Monarch Disease Ontology-20180727_17:11.log"

# from a sparql query, get a list of items that we want to undo edits from
query = """
select ?item (count(*) as ?c) where {
  SELECT DISTINCT ?item ?mondo WHERE {
    ?item wdt:P5270 ?mondo
  }
} group by ?item
having (?c > 1)
order by desc(?c)"""
item_df = WDItemEngine.execute_sparql_query(query, as_dataframe=True)
item_df.item = item_df.item.str.replace("http://www.wikidata.org/entity/", "")
items = list(item_df.item)


# read in a log of the run
def parse_log(file_path):
    df = pd.read_csv(file_path,
                     sep=",",
                     names=[
                         'Level', 'Timestamp', 'External ID', 'Prop', 'QID',
                         'Message', 'Msg Type', 'Rev ID'
                     ],
                     skiprows=2,
                     dtype={
                         'External ID': str,
    ?item2 wdt:P31|wdt:P279 wd:Q8054 .
    FILTER NOT EXISTS {{?item1 wdt:P703 wd:Q15978631}}
	FILTER( ?item1 != ?item2 && STR( ?item1 ) < STR( ?item2 ) ) .
}}"""

s_gene = """
SELECT DISTINCT ?item1 ?item2 ?value {{
	?item1 wdt:P351 ?value .
	?item2 wdt:P351 ?value .
    ?item1 wdt:P703 ?taxon1 .
    ?item2 wdt:P703 ?taxon2 .
	FILTER( ?item1 != ?item2 && STR( ?item1 ) < STR( ?item2 ) && ?taxon1 = ?taxon2) .
    FILTER NOT EXISTS {{?item1 wdt:P703 wd:Q15978631}}
}}"""

s = s_gene

items = [{k: v['value'].split("/")[-1]
          for k, v in x.items()}
         for x in WDItemEngine.execute_sparql_query(s)['results']['bindings']]
for x in tqdm(items):
    try:
        WDItemEngine.merge_items(
            from_id=x['item2'],
            to_id=x['item1'],
            login_obj=login,
            ignore_conflicts='statement|description|sitelink')
    except MergeError as e:
        print(e)
        pass
Esempio n. 19
0
def process_query_string(query):
    """Use WikiDataIntegrator Engine to process a SPARQL Query."""
    result = WDItemEngine.execute_sparql_query(query,
                                               endpoint=SPARQL_ENDPOINT_URL)
    bindings = result['results'].get('bindings')
    return _format_wikidata_bindings(bindings)