def test_make_gene_class():
    coll = MongoClient().wikidata_src.mygene
    metadata_coll = MongoClient().wikidata_src.mygene_sources
    metadata = metadata_coll.find_one()
    doc_filter = {'_id': '100861512'}
    docs = coll.find(doc_filter)
    print("total number of records: {}".format(coll.find(doc_filter).count()))

    validate_type = 'eukaryotic'
    docs = HelperBot.validate_docs(docs, validate_type, 'P351')
    records = HelperBot.tag_mygene_docs(docs, metadata)
    record = next(records)

    organism_info = {
        "name": "H**o sapiens",
        "type": "mammalian",
        "wdid": "Q15978631",
        'taxid': 9606
    }

    login = wdi_login.WDLogin(WDUSER, WDPASS)

    gene = Gene(record, organism_info, login)
    gene.create_item(fast_run=False, write=True)
    gene.remove_deprecated_statements()
Exemple #2
0
def wd_oauth(request):
    """
    handles the authenitication process of wikimedia oauth1
    :param request:
    :return: access token for editing with wikidata api
    """
    if request.method == 'POST':
        body_unicode = request.body.decode('utf-8')
        body = json.loads(body_unicode)
        pprint(body)
        # initiate the handshake by sendin consumer token to wikidata by redirect
        if 'initiate' in body.keys():
            callbackURI = 'http://chlambase.org{}/authorized/'.format(body['current_path'])
            authentication = wdi_login.WDLogin(consumer_key=consumer_key,
                                               consumer_secret=consumer_secret,
                                               callback_url=callbackURI)
            request.session['authOBJ'] = jsonpickle.encode(authentication)
            response_data = {
                'wikimediaURL': authentication.redirect
            }
            return JsonResponse(response_data)

        # parse the url from wikidata for the oauth token and secret
        if 'url' in body.keys():
            authentication = jsonpickle.decode(request.session['authOBJ'])
            authentication.continue_oauth(oauth_callback_data=body['url'])
            request.session['login'] = jsonpickle.encode(authentication)
            return JsonResponse(body)

        # clear the authenitcation if user wants to revoke
        if 'deauthenticate' in body.keys():
            request.session['authentication'] = None
            request.session['login'] = None
            return JsonResponse({'deauthenicate': True})
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--oparl-endpoint",
                        default="http://localhost:8080/oparl/v1.0")
    parser.add_argument("--wikibase-server", default="mediawiki.local")
    parser.add_argument("--base-url-template", default="http://{}/api.php")
    parser.add_argument("--schemadir", default="/home/konsti/oparl/schema")
    args = parser.parse_args()

    oparl_client = ServerIterator(
        url=args.oparl_endpoint,
        cache=IdiomaticFileCache("/home/konsti/cache-idiomatic/url-to-json"))

    login = wdi_login.WDLogin(user='******',
                              pwd='citsdvh4ct69bqepeiblc8p5njnrq26j',
                              server=args.wikibase_server,
                              base_url_template=args.base_url_template)

    mapping = get_properties_mapping_cached(args.schemadir, login,
                                            args.wikibase_server,
                                            args.base_url_template)

    oparl_to_wd = IdiomaticFileCache(
        "/home/konsti/cache-idiomatic/url-to-wd-item-id")
    importer = OParlToWikidata(oparl_client, login, mapping, oparl_to_wd,
                               args.wikibase_server, args.base_url_template)
    importer.index_all()
    def __init__(self, mediawiki_api_url, sparql_endpoint_url, username,
                 password):
        """

        :param mediawiki_api_url:
        :param sparql_endpoint_url:
        :param username:
        :param password:
        """
        """
        mediawiki_api_url = "http://localhost:7171/w/api.php"
        sparql_endpoint_url = "http://localhost:7272/proxy/wdqs/bigdata/namespace/wdq/sparql"
        username = "******"
        password = "******"
        """

        self.mediawiki_api_url = mediawiki_api_url
        self.sparql_endpoint_url = sparql_endpoint_url
        self.username = username
        self.password = password

        self.localItemEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(
            mediawiki_api_url, sparql_endpoint_url)
        self.login = wdi_login.WDLogin(username,
                                       password,
                                       mediawiki_api_url=mediawiki_api_url)
Exemple #5
0
    def __init__(self):
        self.login = wdi_login.WDLogin(WDUSER, WDPASS)
        self._get_mixtures_in_wd()

        rxnorm_qid = wdi_helpers.id_mapper("P3345", return_as_set=True)
        rxnorm_qid = {k: list(v)[0] for k, v in rxnorm_qid.items() if len(v) == 1}
        self.rxnorm_qid = rxnorm_qid
    def test_prepare_for_wikidata_function(self):
        the_so_called_correspondance = utils.load_pickle_file(
            "../tests/cellosaurus_informations_to_wikidata_ids.pickle")

        species = the_so_called_correspondance["species"]
        references = the_so_called_correspondance["references"]
        categories = utils.get_cell_line_category_to_wikidata("../project/category.txt")
        diseases = the_so_called_correspondance["diseases"]
        cellosaurus_dump_in_dictionary_format = utils.format_cellosaurus_dump_as_dictionary("../project/test_cellosaurus.txt")
        wikidata = utils.query_wikidata_for_cell_lines()
        releaseID = "Q87574023"
        login = wdi_login.WDLogin(WDUSER, WDPASS)

        cell_line = utils.CellossaurusCellLine(wdi_login_object=login,
                                      release_qid=releaseID,
                                      cellosaurus_dump=cellosaurus_dump_in_dictionary_format,
                                      wikidata_dictionary_with_existing_cell_lines=wikidata,
                                      references=references,
                                      species=species,
                                      cell_line_categories=categories,
                                      diseases=diseases,
                                      cell_line_id="CVCL_2260")
        data, data_to_delete = cell_line.prepare_for_wikidata()
        print(data)
        print(data_to_delete)

        self.assertEqual(1, 1)


        self.assertEqual(cell_line.cell_line_id, "CVCL_2260")
Exemple #7
0
def wd_oauth(request):
    if request.method == 'POST':
        body_unicode = request.body.decode('utf-8')
        body = json.loads(body_unicode)
        pprint(body)
        if 'initiate' in body.keys():
            print(body)
            callbackURI = "http://chlambase.org" + body[
                'current_path'] + '/authorized/'
            authentication = wdi_login.WDLogin(
                consumer_key=oauth_config.consumer_key,
                consumer_secret=oauth_config.consumer_secret,
                callback_url=callbackURI)
            request.session['authOBJ'] = jsonpickle.encode(authentication)
            response_data = {'wikimediaURL': authentication.redirect}
            return JsonResponse(response_data)
        if 'url' in body.keys():
            authentication = jsonpickle.decode(request.session['authOBJ'])
            authentication.continue_oauth(oauth_callback_data=body['url'])
            request.session['login'] = jsonpickle.encode(authentication)
            return JsonResponse(body)
        if 'deauthenticate' in body.keys():
            request.session['authentication'] = None
            request.session['login'] = None
            return JsonResponse({'deauthenicate': True})
def main(json_path='doid.json', log_dir="./logs", fast_run=True, write=True):
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name,
                                        header=json.dumps(__metadata__))

    with open(json_path) as f:
        d = json.load(f)
    graphs = {g['id']: g for g in d['graphs']}
    graph = graphs['http://purl.obolibrary.org/obo/doid.owl']
    # get the has phenotype, has_material_basis_in, and transmitted by edges from another graph
    graph['edges'].extend(graphs['http://purl.obolibrary.org/obo/doid/obo/ext.owl']['edges'])
    do = DOGraph(graph, login, fast_run)
    nodes = sorted(do.nodes.values(), key=lambda x: x.doid)
    items = []
    for n, node in tqdm(enumerate(nodes), total=len(nodes)):
        item = node.create(write=write)
        # if n>100:
        #    sys.exit(0)
        if item:
            items.append(item)

    sleep(10 * 60)
    doid_wdid = id_mapper('P699')
    frc = items[0].fast_run_container
    if not frc:
        print("fastrun container not found. not removing deprecated statements")
        return None
    frc.clear()
    for doid in tqdm(doid_wdid.values()):
        remove_deprecated_statements(doid, frc, do.release, list(PROPS.values()), login)

    print("You have to remove these deprecated diseases manually: ")
    print(get_deprecated_nodes(graph))
Exemple #9
0
class TestRelease2(unittest.TestCase):


    login = wdi_login.WDLogin(WDUSER, WDPASS)
    species = cellosaurus_informations_to_wikidata_ids["species"]
    references = cellosaurus_informations_to_wikidata_ids["references"]
    diseases = cellosaurus_informations_to_wikidata_ids["diseases"]
    categories = get_cell_line_category_to_wikidata("project/category.txt")

    wikidata=query_wikidata_for_cell_lines()
    cellosaurus_release = Create_Update(login=login, releaseID="Q87574023", cellosaurus=cellosaurus_dump_in_dictionary_format, wikidata=wikidata,
                                     references=references, species=species, categories=categories, diseases=diseases)

    with eventlet.Timeout(10):
        print("ok, timeout set for 10 seconds")
        cellosaurus_release.UpdateWikidata(
          cell_line, cellosaurus_release.InitialisationData(cell_line))

    for cell_line in cellosaurus_dump_in_dictionary_format:
        print(cell_line)

        if cell_line in cellosaurus_release.wikidata:
            print("update")
            cellosaurus_release.UpdateWikidata(cell_line, cellosaurus_release.InitialisationData(cell_line))

    def test_correct_release(self):
        self.assertEqual(release,'Cellosaurus release 34')
Exemple #10
0
def main(write=True, run_one=None):
    d = load_parsed_data()
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    drug_indications = normalize_to_qids(d)

    bot = InxightBot(drug_indications, login, write=write, run_one=run_one)
    bot.run()
Exemple #11
0
 def __init__(self):
     self.login_credentials = {
         "bot_username": "******",
         "bot_password": "******",
     }
     self.login_instance = wdi_login.WDLogin(user="******", pwd="passwd")
     excelfile = openpyxl.load_workbook(PATH_EXCEL_PROPERTIES)
     self.ws = excelfile.active
     self.maxRows = self.ws.max_row
     self.maxCol = self.ws.max_column
def main():

    #-----------------INPUT-------------------------#

    cellosaurus_dump_path = sys.argv[1]
    assert cellosaurus_dump_path, "You need to add a Cellosaurus Dump"

    pickle_path = sys.argv[2]
    assert pickle_path, "You need to add a path to the folder with the pickle files"

    folder_for_errors = sys.argv[3]
    assert folder_for_errors, "You need to add a folder for errors"

    release_qid = sys.argv[4]
    assert release_qid, "You need to add a release QID Dump"

    reconciled_dump_path = pickle_path + "/cellosaurus_wikidata_items.pickle"
    wikidata_cell_lines_path = pickle_path + "/cell_lines_on_wikidata.pickle"
    filename_taxons = pickle_path + "/taxons_on_wikidata.pickle"

    cellosaurus_dump_in_dictionary_format = format_cellosaurus_dump_as_dictionary(
        cellosaurus_dump_path)
    cellosaurus_to_wikidata_matches = load_pickle_file(reconciled_dump_path)

    wikidata_cell_lines = load_pickle_file(wikidata_cell_lines_path)
    login = wdi_login.WDLogin(WDUSER, WDPASS)
    ncbi_id_to_qid_species = load_pickle_file(filename_taxons)

    references = cellosaurus_to_wikidata_matches["references_dictionary"]
    diseases = cellosaurus_to_wikidata_matches["diseases_dictionary"]

    categories = load_cell_line_category_to_wikidata("project/category.txt")

    for cellosaurus_id in tqdm(cellosaurus_dump_in_dictionary_format):

        tqdm.write(cellosaurus_id)
        try:
            tqdm.write(wikidata_cell_lines[cellosaurus_id])
            cell_line = CellosaurusCellLine(
                wdi_login_object=login,
                release_qid=release_qid,
                cellosaurus_dump=cellosaurus_dump_in_dictionary_format,
                wikidata_dictionary_with_existing_cell_lines=
                wikidata_cell_lines,
                references=references,
                species=ncbi_id_to_qid_species,
                cell_line_categories=categories,
                diseases=diseases,
                cell_line_id=cellosaurus_id)

            prepared_data = cell_line.prepare_for_wikidata(folder_for_errors)

            cell_line.update_line_on_wikidata(prepared_data)
        except Exception as e:
            print(e)
    def test_item_generator(self):
        items = ['Q408883', 'P715', 'Q18046452']

        login_obj = wdi_login.WDLogin(user='', pwd='')
        # login_obj = None
        item_instances = wdi_core.WDItemEngine.generate_item_instances(
            items=items, server='www.wikidata.org', login=login_obj)

        for qid, item in item_instances:
            print(qid)
            pprint.pprint(item.entity_metadata)
Exemple #14
0
def main(df, log_dir="./logs", fast_run=False):
    df = filter_df_clinical_missense(df)
    # df = df.head(2)

    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    # make sure we have all the variant items we need
    hgvs_qid = id_mapper(PROPS['HGVS nomenclature'])
    for _, row in tqdm(df.iterrows(), total=len(df)):
        if row.gDNA not in hgvs_qid:
            continue
            label = "{} ({})".format(row.gDNA, row['individual_mutation'])
            print("creating {}".format(label))
            try:
                item = create_missense_variant_item(row.gDNA,
                                                    label,
                                                    login,
                                                    fast_run=fast_run)
            except Exception as e:
                print(e)
                wdi_core.WDItemEngine.log(
                    "ERROR",
                    wdi_helpers.format_msg(row.gDNA, "gDNA", None, str(e),
                                           type(e)))
                continue
            hgvs_qid[row.gDNA] = item.wd_item_id

    for _, row in tqdm(df.iterrows(), total=len(df)):
        if row.gDNA not in hgvs_qid:
            wdi_core.WDItemEngine.log(
                "WARNING",
                wdi_helpers.format_msg(
                    row.gDNA, "gDNA", None,
                    "variant not found: {}".format(row.gDNA),
                    "variant not found"))
            continue
        if row.Association not in association_map:
            wdi_core.WDItemEngine.log(
                "WARNING",
                wdi_helpers.format_msg(
                    row.gDNA, "gDNA", None,
                    "Association not found: {}".format(row.Association),
                    "association not found"))
            continue
        qid = hgvs_qid[row.gDNA]
        association = association_map[row.Association]
        drug_qid = row.Drug_qid
        prim_tt_qid = row.prim_tt_qid
        source = row.Source
        evidence_level = row['Evidence level']

        item = create_variant_annotation(qid, association, drug_qid,
                                         prim_tt_qid, source, evidence_level,
                                         login)
Exemple #15
0
class Product:
    login = wdi_login.WDLogin(WDUSER, WDPASS)
    rxnorm_qid = wdi_helpers.id_mapper("P3345", return_as_set=True)
    rxnorm_qid = {k: list(v)[0] for k, v in rxnorm_qid.items() if len(v) == 1}
    qid_rxnorm = {v: k for k, v in rxnorm_qid.items()}

    def __init__(self, qid=None, rxcui=None, label=None):
        self.qid = qid
        self.rxcui = rxcui
        self.label = label
        if self.qid:
            # get the rxnorm id for this brand
            if rxcui and (self.qid_rxnorm[self.qid] != rxcui):
                raise ValueError("something is wrong: {}".format((self.qid, self.rxcui, rxcui)))
            self.rxcui = self.qid_rxnorm[self.qid]

    def add_active_ingredient(self, ingredient_qid):
        assert self.qid
        s = [wdi_core.WDItemID(ingredient_qid, 'P3781', references=make_ref(self.rxcui))]
        # purposely overwriting this
        item = wdi_core.WDItemEngine(wd_item_id=self.qid, data=s, domain="drugs",
                                     fast_run=True, fast_run_use_refs=True,
                                     fast_run_base_filter={"P3345": ""},
                                     ref_handler=ref_handlers.update_retrieved_if_new)
        item.write(self.login)

        # and adding the inverse
        s = [wdi_core.WDItemID(self.qid, 'P3780', references=make_ref(self.rxcui))]
        # do not overwrite
        item = wdi_core.WDItemEngine(wd_item_id=ingredient_qid, data=s, domain="drugs",
                                     fast_run=True, fast_run_use_refs=True,
                                     fast_run_base_filter={"P3345": ""},
                                     ref_handler=ref_handlers.update_retrieved_if_new,
                                     append_value=['P3780'])
        item.write(self.login)

    def get_or_create(self):
        assert self.rxcui
        if self.rxcui in self.rxnorm_qid:
            return self.rxnorm_qid[self.rxcui]
        assert self.label
        s = []
        s.append(wdi_core.WDItemID('Q28885102', 'P31', references=make_ref(self.rxcui)))  # pharma product
        s.append(wdi_core.WDExternalID(self.rxcui, "P3345", references=make_ref(self.rxcui)))

        item = wdi_core.WDItemEngine(item_name=self.label, data=s, domain="drugs")
        item.set_label(self.label)
        item.set_description("pharmaceutical product")
        item.write(self.login)
        qid = item.wd_item_id
        self.qid = qid
        return qid
Exemple #16
0
def main(json_path='doid.json', log_dir="./logs", fast_run=True, write=True):
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        logger_name='WD_logger',
                                        log_name=log_name,
                                        header=json.dumps(__metadata__))

    with open(json_path) as f:
        d = json.load(f)
    graph = d['graphs'][0]
    do = DOGraph(graph, login, fast_run)
    for node in tqdm(do.nodes.values()):
        node.create(write=write)
def main(gwas_path='GWAS_Catalog.tsv',
         log_dir="./logs",
         fast_run=False,
         write=True):
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        logger_name='WD_logger',
                                        log_name=log_name,
                                        header=json.dumps(__metadata__))

    gene_disease_bot = GeneDiseaseBot(catalog_tsv_path=gwas_path,
                                      login=login,
                                      fast_run=fast_run,
                                      write=write).run()
def main():

    # -----------------INPUT-------------------------#

    cellosaurus_dump_path = sys.argv[1]
    assert cellosaurus_dump_path, "You need to add a Cellosaurus Dump"

    cellosaurus_dump_in_dictionary_format = format_cellosaurus_dump_as_dictionary(
        cellosaurus_dump_path)

    url = "https://w.wiki/3Uxc"
    session = requests.Session()  # so connections are recycled
    resp = session.head(url, allow_redirects=True)
    url_sparql = resp.url.replace("https://query.wikidata.org/#",
                                  "https://query.wikidata.org/sparql?query=")
    r = requests.get(url_sparql, params={"format": "json"})
    df = pd.json_normalize(r.json()["results"]["bindings"])
    print(df)
    print(df.columns)
    login = wdi_login.WDLogin(WDUSER, WDPASS)

    df["qid"] = [url.split("/")[4] for url in df["item.value"]]

    for cellosaurus_id in tqdm(cellosaurus_dump_in_dictionary_format):

        if cellosaurus_id in df["cellosaurus.value"].values:
            print("==========")
            if (cellosaurus_dump_in_dictionary_format[cellosaurus_id]
                ["hPSCreg"] == "NULL"):
                print(f"Bad id for cell line {cellosaurus_id}")
                data_to_add_to_wikidata = [
                    wdi_core.WDBaseDataType.delete_statement("P9554")
                ]
                data_to_add_to_wikidata.append(
                    wdi_core.WDExternalID(value=cellosaurus_id,
                                          prop_nr="P3289"))

                df_now = df[df["cellosaurus.value"] == cellosaurus_id]
                properties_to_append_value = ["P3289"]
                wd_item = wdi_core.WDItemEngine(
                    wd_item_id=df_now["qid"].values[0],
                    data=data_to_add_to_wikidata,
                    append_value=properties_to_append_value,
                )
                a = wd_item.write(
                    login,
                    bot_account=True,
                )
                print(a)
Exemple #19
0
def main(retrieved, fast_run, write):
    login = wdi_login.WDLogin(WDUSER, WDPASS)
    temp = Graph()
    url = 'http://data.wikipathways.org/current/rdf'
    page = requests.get(url).text
    files = []
    for link in BeautifulSoup(page, "lxml", parse_only=SoupStrainer('a')):
        address = str(link).split("\"")
        if len(address) > 1:
            filename = address[1].replace("./", "/")
            if len(filename) > 1:
                if filename not in files:
                    if filename != "./":
                        files.append(url + filename)
    wpids = []
    for file in set(files):
        if "rdf-wp" in file:  # get the most accurate file
            print(file)
            u = requests.get(file)
            with closing(u), zipfile.ZipFile(io.BytesIO(u.content)) as archive:
                for member in archive.infolist():
                    nt_content = archive.read(member)
                    # print(nt_content)
                    temp.parse(data=nt_content.decode(), format="turtle")
            print("size: " + str(len(temp)))

    wp_query = """prefix dcterm: <http://purl.org/dc/terms/>
            prefix wp: <http://vocabularies.wikipathways.org/wp#>
            SELECT DISTINCT ?wpid WHERE {
              ?s rdf:type <http://vocabularies.wikipathways.org/wp#Pathway> ;
                 dcterm:identifier ?wpid ;
                 ?p <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ;
                 wp:organism <http://purl.obolibrary.org/obo/NCBITaxon_9606> .
              }"""

    qres = temp.query(wp_query)
    for row in qres:
        print("%s" % row)
        wpids.append(str(row[0]))

    for pathway_id in wpids:
        try:
            run_one(pathway_id, retrieved, fast_run, write, login, temp)
        except Exception as e:
            traceback.print_exc()
            wdi_core.WDItemEngine.log(
                "ERROR",
                wdi_helpers.format_msg(pathway_id, PROPS['Wikipathways ID'],
                                       None, str(e), type(e)))
Exemple #20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--schemadir", default="/home/konsti/oparl/schema")
    parser.add_argument("--wikibase-server", default="mediawiki.local")
    parser.add_argument("--base-url-template", default="http://{}/api.php")
    args = parser.parse_args()
    schemadir = args.schemadir

    login = wdi_login.WDLogin(user='******',
                              pwd='citsdvh4ct69bqepeiblc8p5njnrq26j',
                              server=args.wikibase_server,
                              base_url_template=args.base_url_template)

    print(
        get_properties_mapping_cached(schemadir, login, args.wikibase_server,
                                      args.base_url_template))
Exemple #21
0
def create_artist(artist_obj):
    data = []
    artist_name = get_artist_name(artist_obj)
    data.append(wdi_core.WDItemID(value=HUMAN_ID, prop_nr=INSTANCE_OF_ID))
    data.append(wdi_core.WDItemID(value=MUSICIAN_ID, prop_nr=OCCUPATION_ID))
    data.append(
        wdi_core.WDExternalID(value=get_musicbraiz_artist_id(artist_obj),
                              prop_nr=MUSIC_BRAINZ_PROP_ID))
    entity = wdi_core.WDItemEngine(data=data)
    entity.set_label(artist_name)
    entity.set_description(MUSICIAN)

    login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020')
    entity_id = entity.write(login_instance)
    print(f"Artist {artist_name} has been added to WikiData server.")
    return entity_id
Exemple #22
0
def main():
    parser.add_argument("--species",
                        "-s",
                        help="a species species_id",
                        type=str)
    parser.add_argument(
        "--wikidata",
        "-w",
        help="1 to exclude complexes on Wikidata, 0 to include",
        type=int,
        default=1,
    )
    parser.add_argument(
        "--number",
        "-n",
        help="the number of complexes to add",
        type=int,
        default=999999,
    )

    args = parser.parse_args()

    if len(sys.argv) < 4:
        sys.exit(
            "Usage: python3 update_complex.py -s [species id] -w [boolean] -n [number of complexes]"
        )
    species_id = args.species
    test_on_wikidata = bool(args.wikidata)
    number_of_complexes_to_add = args.number
    dataset_urls = utils.get_complex_portal_dataset_urls()

    # Make a dataframe for all complexes of a given species
    list_of_complexes = utils.get_list_of_complexes(
        dataset_urls,
        species_id=species_id,
        test_on_wikidata=test_on_wikidata,
        max_complexes=number_of_complexes_to_add,
    )
    login_instance = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    references = utils.prepare_refs(species_id=species_id)

    print("===== Updating complexes on Wikidata =====")

    for protein_complex in list_of_complexes:
        print(protein_complex.complex_id)
        utils.update_complex(login_instance, protein_complex, references)
Exemple #23
0
def main(args):
    """Usage: ReactomeBot  WDusername, WDpassword (input-filename)
       This program take the input-filename or use test/test_reactome_data.csv
       if none given and write the wikidata pages
       NOTE: At present if will only actually write pages to test,
       lines 177/8 need to change to allow a write
    """
    filename = 'test/test_reactome_data.csv'
    if len(args) < 3 or len(args) > 4:
        print(main.__doc__)
        sys.exit()
    elif len(args) == 4:
        filename = args[3]

    logincreds = wdi_login.WDLogin(user=args[1], pwd=args[2], server=server)
    results = get_data_from_reactome(filename)
    create_or_update_items(logincreds, results)
Exemple #24
0
def main(user,
         password,
         mediawiki_api_url,
         sparql_endpoint_url,
         node_path,
         edge_path,
         simulate=False):
    login = wdi_login.WDLogin(user=user,
                              pwd=password,
                              mediawiki_api_url=mediawiki_api_url)
    bot = Bot(node_path,
              edge_path,
              mediawiki_api_url,
              sparql_endpoint_url,
              login,
              simulate=simulate)
    bot.run()
Exemple #25
0
def main(write=True, run_one=None):
    omim_qid = wdi_helpers.id_mapper(PROPS['OMIM ID'],
                                     prefer_exact_match=True,
                                     return_as_set=True)
    omim_qid = {k: list(v)[0] for k, v in omim_qid.items() if len(v) == 1}
    hpo_qid = wdi_helpers.id_mapper(PROPS['Human Phenotype Ontology ID'],
                                    prefer_exact_match=True,
                                    return_as_set=True)
    hpo_qid = {k: list(v)[0] for k, v in hpo_qid.items() if len(v) == 1}

    df = pd.read_csv("mitodb.csv", dtype=str)
    df['disease_qid'] = df.disease.map(omim_qid.get)
    df['phenotype_qid'] = df.hpo.map(hpo_qid.get)
    df.dropna(subset=['disease_qid', 'phenotype_qid'], inplace=True)

    records = df.to_dict("records")
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    bot = MitoBot(records, login, write=write, run_one=run_one)
    bot.run()
Exemple #26
0
def genes():
    entrez_wd = id_mapper("P351")

    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    coll = MongoClient().wikidata_src.mygene
    metadata_coll = MongoClient().wikidata_src.mygene_sources
    metadata = metadata_coll.find_one()
    organism_info = organisms_info[7955]

    doc_filter = {'taxid': 7955, 'entrezgene': {'$exists': True}}
    docs = coll.find(doc_filter).batch_size(20)
    total = docs.count()
    print("total number of records: {}".format(total))
    docs = HelperBot.validate_docs(docs, 'eukaryotic', PROPS['Entrez Gene ID'])
    records = HelperBot.tag_mygene_docs(docs, metadata)
    records = list(records)

    # find all names with dupes
    dupe_names = {
        k
        for k, v in Counter([x['symbol']['@value'] for x in records]).items()
        if v > 1
    }

    # for all records that have one of these names, change the name to "name (entrezgene)"
    records = [x for x in records if x['symbol']['@value'] in dupe_names]
    for record in records:
        record['symbol']['@value'] = record['symbol']['@value'] + " (" + str(
            record['entrezgene']['@value']) + ")"

    # skip items that aren't already in wikidata (DONT CREATE NEW ITEMS!)
    records = [
        x for x in records if str(x['entrezgene']['@value']) in entrez_wd
    ]

    print("len records: {}".format(len(records)))

    cb = ChromosomeBot()
    chr_num_wdid = cb.get_or_create(organism_info, login=login)
    bot = GeneBot.ChromosomalGeneBot(organism_info, chr_num_wdid, login)
    bot.filter = lambda x: iter(x)
    bot.run(records, total=total, fast_run=True, write=True)
Exemple #27
0
def main(retrieved, fast_run, write, variant_id=None):
    login = wdi_login.WDLogin(WDUSER, WDPASS)

    if variant_id:
        records = [{'id': variant_id}]
    else:
        r = requests.get(
            'https://civic.genome.wustl.edu/api/variants?count=999999999')
        variants_data = r.json()
        records = variants_data['records']

    for record in tqdm(records):
        try:
            run_one(record['id'], retrieved, fast_run, write, login)
        except Exception as e:
            traceback.print_exc()
            wdi_core.WDItemEngine.log(
                "ERROR",
                wdi_helpers.format_msg(record['id'], PROPS['CIViC Variant ID'],
                                       None, str(e), type(e)))
Exemple #28
0
def create_song(song_obj, artist_wikidata_id, artist_name):
    data = []
    song_name = get_song_name(song_obj)
    data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID))
    data.append(
        wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID))
    data.append(
        wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj),
                              prop_nr=MUSIC_BRAINZ_SONG_PROP_ID))
    data.append(wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID))

    entity = wdi_core.WDItemEngine(data=data)
    entity.set_label(song_name)
    entity.set_description(f"Song by {artist_name}")

    login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020')
    entity.write(login_instance)
    print(
        f"Song {song_name} by {artist_name} has been added to WikiData server."
    )
Exemple #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--entrypoint", default="http://*****:*****@Bot1")
    parser.add_argument("--password", default="kft004cr9toivjesd7obnqm16mpr3596")
    args = parser.parse_args()

    os.makedirs(args.cachedir, exist_ok=True)

    login = wdi_login.WDLogin(
        user=args.user, pwd=args.password,
        server=args.wikibase_server, base_url_template=args.base_url_template)

    loader = Wikiparl(args.oparl_schema_location, login, args.wikibase_server, args.base_url_template, args.cachedir)
    loader.load_type_mapping()
    loader.run(args.entrypoint)
Exemple #30
0
 def __init__(self,
              mediawiki_api_url,
              sparql_endpoint_url,
              username,
              password,
              set_of_uris_for_asio=set(),
              factory_of_uris: URIFactory = URIFactoryMock()):
     self.api_url = mediawiki_api_url
     self.sparql_url = sparql_endpoint_url
     self._local_item_engine = wdi_core.WDItemEngine. \
         wikibase_item_engine_factory(mediawiki_api_url, sparql_endpoint_url)
     self._local_login = wdi_login.WDLogin(username, password,
                                           mediawiki_api_url)
     self._mappings_prop = self._get_or_create_mappings_prop()
     self._init_callbacks()
     # added the related link to original URI
     self._related_link_prop = self._get_or_create_related_link_prop()
     # for same As
     self._uri_set_for_sameas = set_of_uris_for_asio
     # Uris factory
     self._uris_factory = factory_of_uris