def test_make_gene_class(): coll = MongoClient().wikidata_src.mygene metadata_coll = MongoClient().wikidata_src.mygene_sources metadata = metadata_coll.find_one() doc_filter = {'_id': '100861512'} docs = coll.find(doc_filter) print("total number of records: {}".format(coll.find(doc_filter).count())) validate_type = 'eukaryotic' docs = HelperBot.validate_docs(docs, validate_type, 'P351') records = HelperBot.tag_mygene_docs(docs, metadata) record = next(records) organism_info = { "name": "H**o sapiens", "type": "mammalian", "wdid": "Q15978631", 'taxid': 9606 } login = wdi_login.WDLogin(WDUSER, WDPASS) gene = Gene(record, organism_info, login) gene.create_item(fast_run=False, write=True) gene.remove_deprecated_statements()
def wd_oauth(request): """ handles the authenitication process of wikimedia oauth1 :param request: :return: access token for editing with wikidata api """ if request.method == 'POST': body_unicode = request.body.decode('utf-8') body = json.loads(body_unicode) pprint(body) # initiate the handshake by sendin consumer token to wikidata by redirect if 'initiate' in body.keys(): callbackURI = 'http://chlambase.org{}/authorized/'.format(body['current_path']) authentication = wdi_login.WDLogin(consumer_key=consumer_key, consumer_secret=consumer_secret, callback_url=callbackURI) request.session['authOBJ'] = jsonpickle.encode(authentication) response_data = { 'wikimediaURL': authentication.redirect } return JsonResponse(response_data) # parse the url from wikidata for the oauth token and secret if 'url' in body.keys(): authentication = jsonpickle.decode(request.session['authOBJ']) authentication.continue_oauth(oauth_callback_data=body['url']) request.session['login'] = jsonpickle.encode(authentication) return JsonResponse(body) # clear the authenitcation if user wants to revoke if 'deauthenticate' in body.keys(): request.session['authentication'] = None request.session['login'] = None return JsonResponse({'deauthenicate': True})
def main(): parser = argparse.ArgumentParser() parser.add_argument("--oparl-endpoint", default="http://localhost:8080/oparl/v1.0") parser.add_argument("--wikibase-server", default="mediawiki.local") parser.add_argument("--base-url-template", default="http://{}/api.php") parser.add_argument("--schemadir", default="/home/konsti/oparl/schema") args = parser.parse_args() oparl_client = ServerIterator( url=args.oparl_endpoint, cache=IdiomaticFileCache("/home/konsti/cache-idiomatic/url-to-json")) login = wdi_login.WDLogin(user='******', pwd='citsdvh4ct69bqepeiblc8p5njnrq26j', server=args.wikibase_server, base_url_template=args.base_url_template) mapping = get_properties_mapping_cached(args.schemadir, login, args.wikibase_server, args.base_url_template) oparl_to_wd = IdiomaticFileCache( "/home/konsti/cache-idiomatic/url-to-wd-item-id") importer = OParlToWikidata(oparl_client, login, mapping, oparl_to_wd, args.wikibase_server, args.base_url_template) importer.index_all()
def __init__(self, mediawiki_api_url, sparql_endpoint_url, username, password): """ :param mediawiki_api_url: :param sparql_endpoint_url: :param username: :param password: """ """ mediawiki_api_url = "http://localhost:7171/w/api.php" sparql_endpoint_url = "http://localhost:7272/proxy/wdqs/bigdata/namespace/wdq/sparql" username = "******" password = "******" """ self.mediawiki_api_url = mediawiki_api_url self.sparql_endpoint_url = sparql_endpoint_url self.username = username self.password = password self.localItemEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory( mediawiki_api_url, sparql_endpoint_url) self.login = wdi_login.WDLogin(username, password, mediawiki_api_url=mediawiki_api_url)
def __init__(self): self.login = wdi_login.WDLogin(WDUSER, WDPASS) self._get_mixtures_in_wd() rxnorm_qid = wdi_helpers.id_mapper("P3345", return_as_set=True) rxnorm_qid = {k: list(v)[0] for k, v in rxnorm_qid.items() if len(v) == 1} self.rxnorm_qid = rxnorm_qid
def test_prepare_for_wikidata_function(self): the_so_called_correspondance = utils.load_pickle_file( "../tests/cellosaurus_informations_to_wikidata_ids.pickle") species = the_so_called_correspondance["species"] references = the_so_called_correspondance["references"] categories = utils.get_cell_line_category_to_wikidata("../project/category.txt") diseases = the_so_called_correspondance["diseases"] cellosaurus_dump_in_dictionary_format = utils.format_cellosaurus_dump_as_dictionary("../project/test_cellosaurus.txt") wikidata = utils.query_wikidata_for_cell_lines() releaseID = "Q87574023" login = wdi_login.WDLogin(WDUSER, WDPASS) cell_line = utils.CellossaurusCellLine(wdi_login_object=login, release_qid=releaseID, cellosaurus_dump=cellosaurus_dump_in_dictionary_format, wikidata_dictionary_with_existing_cell_lines=wikidata, references=references, species=species, cell_line_categories=categories, diseases=diseases, cell_line_id="CVCL_2260") data, data_to_delete = cell_line.prepare_for_wikidata() print(data) print(data_to_delete) self.assertEqual(1, 1) self.assertEqual(cell_line.cell_line_id, "CVCL_2260")
def wd_oauth(request): if request.method == 'POST': body_unicode = request.body.decode('utf-8') body = json.loads(body_unicode) pprint(body) if 'initiate' in body.keys(): print(body) callbackURI = "http://chlambase.org" + body[ 'current_path'] + '/authorized/' authentication = wdi_login.WDLogin( consumer_key=oauth_config.consumer_key, consumer_secret=oauth_config.consumer_secret, callback_url=callbackURI) request.session['authOBJ'] = jsonpickle.encode(authentication) response_data = {'wikimediaURL': authentication.redirect} return JsonResponse(response_data) if 'url' in body.keys(): authentication = jsonpickle.decode(request.session['authOBJ']) authentication.continue_oauth(oauth_callback_data=body['url']) request.session['login'] = jsonpickle.encode(authentication) return JsonResponse(body) if 'deauthenticate' in body.keys(): request.session['authentication'] = None request.session['login'] = None return JsonResponse({'deauthenicate': True})
def main(json_path='doid.json', log_dir="./logs", fast_run=True, write=True): login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name, header=json.dumps(__metadata__)) with open(json_path) as f: d = json.load(f) graphs = {g['id']: g for g in d['graphs']} graph = graphs['http://purl.obolibrary.org/obo/doid.owl'] # get the has phenotype, has_material_basis_in, and transmitted by edges from another graph graph['edges'].extend(graphs['http://purl.obolibrary.org/obo/doid/obo/ext.owl']['edges']) do = DOGraph(graph, login, fast_run) nodes = sorted(do.nodes.values(), key=lambda x: x.doid) items = [] for n, node in tqdm(enumerate(nodes), total=len(nodes)): item = node.create(write=write) # if n>100: # sys.exit(0) if item: items.append(item) sleep(10 * 60) doid_wdid = id_mapper('P699') frc = items[0].fast_run_container if not frc: print("fastrun container not found. not removing deprecated statements") return None frc.clear() for doid in tqdm(doid_wdid.values()): remove_deprecated_statements(doid, frc, do.release, list(PROPS.values()), login) print("You have to remove these deprecated diseases manually: ") print(get_deprecated_nodes(graph))
class TestRelease2(unittest.TestCase): login = wdi_login.WDLogin(WDUSER, WDPASS) species = cellosaurus_informations_to_wikidata_ids["species"] references = cellosaurus_informations_to_wikidata_ids["references"] diseases = cellosaurus_informations_to_wikidata_ids["diseases"] categories = get_cell_line_category_to_wikidata("project/category.txt") wikidata=query_wikidata_for_cell_lines() cellosaurus_release = Create_Update(login=login, releaseID="Q87574023", cellosaurus=cellosaurus_dump_in_dictionary_format, wikidata=wikidata, references=references, species=species, categories=categories, diseases=diseases) with eventlet.Timeout(10): print("ok, timeout set for 10 seconds") cellosaurus_release.UpdateWikidata( cell_line, cellosaurus_release.InitialisationData(cell_line)) for cell_line in cellosaurus_dump_in_dictionary_format: print(cell_line) if cell_line in cellosaurus_release.wikidata: print("update") cellosaurus_release.UpdateWikidata(cell_line, cellosaurus_release.InitialisationData(cell_line)) def test_correct_release(self): self.assertEqual(release,'Cellosaurus release 34')
def main(write=True, run_one=None): d = load_parsed_data() login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) drug_indications = normalize_to_qids(d) bot = InxightBot(drug_indications, login, write=write, run_one=run_one) bot.run()
def __init__(self): self.login_credentials = { "bot_username": "******", "bot_password": "******", } self.login_instance = wdi_login.WDLogin(user="******", pwd="passwd") excelfile = openpyxl.load_workbook(PATH_EXCEL_PROPERTIES) self.ws = excelfile.active self.maxRows = self.ws.max_row self.maxCol = self.ws.max_column
def main(): #-----------------INPUT-------------------------# cellosaurus_dump_path = sys.argv[1] assert cellosaurus_dump_path, "You need to add a Cellosaurus Dump" pickle_path = sys.argv[2] assert pickle_path, "You need to add a path to the folder with the pickle files" folder_for_errors = sys.argv[3] assert folder_for_errors, "You need to add a folder for errors" release_qid = sys.argv[4] assert release_qid, "You need to add a release QID Dump" reconciled_dump_path = pickle_path + "/cellosaurus_wikidata_items.pickle" wikidata_cell_lines_path = pickle_path + "/cell_lines_on_wikidata.pickle" filename_taxons = pickle_path + "/taxons_on_wikidata.pickle" cellosaurus_dump_in_dictionary_format = format_cellosaurus_dump_as_dictionary( cellosaurus_dump_path) cellosaurus_to_wikidata_matches = load_pickle_file(reconciled_dump_path) wikidata_cell_lines = load_pickle_file(wikidata_cell_lines_path) login = wdi_login.WDLogin(WDUSER, WDPASS) ncbi_id_to_qid_species = load_pickle_file(filename_taxons) references = cellosaurus_to_wikidata_matches["references_dictionary"] diseases = cellosaurus_to_wikidata_matches["diseases_dictionary"] categories = load_cell_line_category_to_wikidata("project/category.txt") for cellosaurus_id in tqdm(cellosaurus_dump_in_dictionary_format): tqdm.write(cellosaurus_id) try: tqdm.write(wikidata_cell_lines[cellosaurus_id]) cell_line = CellosaurusCellLine( wdi_login_object=login, release_qid=release_qid, cellosaurus_dump=cellosaurus_dump_in_dictionary_format, wikidata_dictionary_with_existing_cell_lines= wikidata_cell_lines, references=references, species=ncbi_id_to_qid_species, cell_line_categories=categories, diseases=diseases, cell_line_id=cellosaurus_id) prepared_data = cell_line.prepare_for_wikidata(folder_for_errors) cell_line.update_line_on_wikidata(prepared_data) except Exception as e: print(e)
def test_item_generator(self): items = ['Q408883', 'P715', 'Q18046452'] login_obj = wdi_login.WDLogin(user='', pwd='') # login_obj = None item_instances = wdi_core.WDItemEngine.generate_item_instances( items=items, server='www.wikidata.org', login=login_obj) for qid, item in item_instances: print(qid) pprint.pprint(item.entity_metadata)
def main(df, log_dir="./logs", fast_run=False): df = filter_df_clinical_missense(df) # df = df.head(2) login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) # make sure we have all the variant items we need hgvs_qid = id_mapper(PROPS['HGVS nomenclature']) for _, row in tqdm(df.iterrows(), total=len(df)): if row.gDNA not in hgvs_qid: continue label = "{} ({})".format(row.gDNA, row['individual_mutation']) print("creating {}".format(label)) try: item = create_missense_variant_item(row.gDNA, label, login, fast_run=fast_run) except Exception as e: print(e) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(row.gDNA, "gDNA", None, str(e), type(e))) continue hgvs_qid[row.gDNA] = item.wd_item_id for _, row in tqdm(df.iterrows(), total=len(df)): if row.gDNA not in hgvs_qid: wdi_core.WDItemEngine.log( "WARNING", wdi_helpers.format_msg( row.gDNA, "gDNA", None, "variant not found: {}".format(row.gDNA), "variant not found")) continue if row.Association not in association_map: wdi_core.WDItemEngine.log( "WARNING", wdi_helpers.format_msg( row.gDNA, "gDNA", None, "Association not found: {}".format(row.Association), "association not found")) continue qid = hgvs_qid[row.gDNA] association = association_map[row.Association] drug_qid = row.Drug_qid prim_tt_qid = row.prim_tt_qid source = row.Source evidence_level = row['Evidence level'] item = create_variant_annotation(qid, association, drug_qid, prim_tt_qid, source, evidence_level, login)
class Product: login = wdi_login.WDLogin(WDUSER, WDPASS) rxnorm_qid = wdi_helpers.id_mapper("P3345", return_as_set=True) rxnorm_qid = {k: list(v)[0] for k, v in rxnorm_qid.items() if len(v) == 1} qid_rxnorm = {v: k for k, v in rxnorm_qid.items()} def __init__(self, qid=None, rxcui=None, label=None): self.qid = qid self.rxcui = rxcui self.label = label if self.qid: # get the rxnorm id for this brand if rxcui and (self.qid_rxnorm[self.qid] != rxcui): raise ValueError("something is wrong: {}".format((self.qid, self.rxcui, rxcui))) self.rxcui = self.qid_rxnorm[self.qid] def add_active_ingredient(self, ingredient_qid): assert self.qid s = [wdi_core.WDItemID(ingredient_qid, 'P3781', references=make_ref(self.rxcui))] # purposely overwriting this item = wdi_core.WDItemEngine(wd_item_id=self.qid, data=s, domain="drugs", fast_run=True, fast_run_use_refs=True, fast_run_base_filter={"P3345": ""}, ref_handler=ref_handlers.update_retrieved_if_new) item.write(self.login) # and adding the inverse s = [wdi_core.WDItemID(self.qid, 'P3780', references=make_ref(self.rxcui))] # do not overwrite item = wdi_core.WDItemEngine(wd_item_id=ingredient_qid, data=s, domain="drugs", fast_run=True, fast_run_use_refs=True, fast_run_base_filter={"P3345": ""}, ref_handler=ref_handlers.update_retrieved_if_new, append_value=['P3780']) item.write(self.login) def get_or_create(self): assert self.rxcui if self.rxcui in self.rxnorm_qid: return self.rxnorm_qid[self.rxcui] assert self.label s = [] s.append(wdi_core.WDItemID('Q28885102', 'P31', references=make_ref(self.rxcui))) # pharma product s.append(wdi_core.WDExternalID(self.rxcui, "P3345", references=make_ref(self.rxcui))) item = wdi_core.WDItemEngine(item_name=self.label, data=s, domain="drugs") item.set_label(self.label) item.set_description("pharmaceutical product") item.write(self.login) qid = item.wd_item_id self.qid = qid return qid
def main(json_path='doid.json', log_dir="./logs", fast_run=True, write=True): login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name, header=json.dumps(__metadata__)) with open(json_path) as f: d = json.load(f) graph = d['graphs'][0] do = DOGraph(graph, login, fast_run) for node in tqdm(do.nodes.values()): node.create(write=write)
def main(gwas_path='GWAS_Catalog.tsv', log_dir="./logs", fast_run=False, write=True): login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name, header=json.dumps(__metadata__)) gene_disease_bot = GeneDiseaseBot(catalog_tsv_path=gwas_path, login=login, fast_run=fast_run, write=write).run()
def main(): # -----------------INPUT-------------------------# cellosaurus_dump_path = sys.argv[1] assert cellosaurus_dump_path, "You need to add a Cellosaurus Dump" cellosaurus_dump_in_dictionary_format = format_cellosaurus_dump_as_dictionary( cellosaurus_dump_path) url = "https://w.wiki/3Uxc" session = requests.Session() # so connections are recycled resp = session.head(url, allow_redirects=True) url_sparql = resp.url.replace("https://query.wikidata.org/#", "https://query.wikidata.org/sparql?query=") r = requests.get(url_sparql, params={"format": "json"}) df = pd.json_normalize(r.json()["results"]["bindings"]) print(df) print(df.columns) login = wdi_login.WDLogin(WDUSER, WDPASS) df["qid"] = [url.split("/")[4] for url in df["item.value"]] for cellosaurus_id in tqdm(cellosaurus_dump_in_dictionary_format): if cellosaurus_id in df["cellosaurus.value"].values: print("==========") if (cellosaurus_dump_in_dictionary_format[cellosaurus_id] ["hPSCreg"] == "NULL"): print(f"Bad id for cell line {cellosaurus_id}") data_to_add_to_wikidata = [ wdi_core.WDBaseDataType.delete_statement("P9554") ] data_to_add_to_wikidata.append( wdi_core.WDExternalID(value=cellosaurus_id, prop_nr="P3289")) df_now = df[df["cellosaurus.value"] == cellosaurus_id] properties_to_append_value = ["P3289"] wd_item = wdi_core.WDItemEngine( wd_item_id=df_now["qid"].values[0], data=data_to_add_to_wikidata, append_value=properties_to_append_value, ) a = wd_item.write( login, bot_account=True, ) print(a)
def main(retrieved, fast_run, write): login = wdi_login.WDLogin(WDUSER, WDPASS) temp = Graph() url = 'http://data.wikipathways.org/current/rdf' page = requests.get(url).text files = [] for link in BeautifulSoup(page, "lxml", parse_only=SoupStrainer('a')): address = str(link).split("\"") if len(address) > 1: filename = address[1].replace("./", "/") if len(filename) > 1: if filename not in files: if filename != "./": files.append(url + filename) wpids = [] for file in set(files): if "rdf-wp" in file: # get the most accurate file print(file) u = requests.get(file) with closing(u), zipfile.ZipFile(io.BytesIO(u.content)) as archive: for member in archive.infolist(): nt_content = archive.read(member) # print(nt_content) temp.parse(data=nt_content.decode(), format="turtle") print("size: " + str(len(temp))) wp_query = """prefix dcterm: <http://purl.org/dc/terms/> prefix wp: <http://vocabularies.wikipathways.org/wp#> SELECT DISTINCT ?wpid WHERE { ?s rdf:type <http://vocabularies.wikipathways.org/wp#Pathway> ; dcterm:identifier ?wpid ; ?p <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ; wp:organism <http://purl.obolibrary.org/obo/NCBITaxon_9606> . }""" qres = temp.query(wp_query) for row in qres: print("%s" % row) wpids.append(str(row[0])) for pathway_id in wpids: try: run_one(pathway_id, retrieved, fast_run, write, login, temp) except Exception as e: traceback.print_exc() wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(pathway_id, PROPS['Wikipathways ID'], None, str(e), type(e)))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--schemadir", default="/home/konsti/oparl/schema") parser.add_argument("--wikibase-server", default="mediawiki.local") parser.add_argument("--base-url-template", default="http://{}/api.php") args = parser.parse_args() schemadir = args.schemadir login = wdi_login.WDLogin(user='******', pwd='citsdvh4ct69bqepeiblc8p5njnrq26j', server=args.wikibase_server, base_url_template=args.base_url_template) print( get_properties_mapping_cached(schemadir, login, args.wikibase_server, args.base_url_template))
def create_artist(artist_obj): data = [] artist_name = get_artist_name(artist_obj) data.append(wdi_core.WDItemID(value=HUMAN_ID, prop_nr=INSTANCE_OF_ID)) data.append(wdi_core.WDItemID(value=MUSICIAN_ID, prop_nr=OCCUPATION_ID)) data.append( wdi_core.WDExternalID(value=get_musicbraiz_artist_id(artist_obj), prop_nr=MUSIC_BRAINZ_PROP_ID)) entity = wdi_core.WDItemEngine(data=data) entity.set_label(artist_name) entity.set_description(MUSICIAN) login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020') entity_id = entity.write(login_instance) print(f"Artist {artist_name} has been added to WikiData server.") return entity_id
def main(): parser.add_argument("--species", "-s", help="a species species_id", type=str) parser.add_argument( "--wikidata", "-w", help="1 to exclude complexes on Wikidata, 0 to include", type=int, default=1, ) parser.add_argument( "--number", "-n", help="the number of complexes to add", type=int, default=999999, ) args = parser.parse_args() if len(sys.argv) < 4: sys.exit( "Usage: python3 update_complex.py -s [species id] -w [boolean] -n [number of complexes]" ) species_id = args.species test_on_wikidata = bool(args.wikidata) number_of_complexes_to_add = args.number dataset_urls = utils.get_complex_portal_dataset_urls() # Make a dataframe for all complexes of a given species list_of_complexes = utils.get_list_of_complexes( dataset_urls, species_id=species_id, test_on_wikidata=test_on_wikidata, max_complexes=number_of_complexes_to_add, ) login_instance = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) references = utils.prepare_refs(species_id=species_id) print("===== Updating complexes on Wikidata =====") for protein_complex in list_of_complexes: print(protein_complex.complex_id) utils.update_complex(login_instance, protein_complex, references)
def main(args): """Usage: ReactomeBot WDusername, WDpassword (input-filename) This program take the input-filename or use test/test_reactome_data.csv if none given and write the wikidata pages NOTE: At present if will only actually write pages to test, lines 177/8 need to change to allow a write """ filename = 'test/test_reactome_data.csv' if len(args) < 3 or len(args) > 4: print(main.__doc__) sys.exit() elif len(args) == 4: filename = args[3] logincreds = wdi_login.WDLogin(user=args[1], pwd=args[2], server=server) results = get_data_from_reactome(filename) create_or_update_items(logincreds, results)
def main(user, password, mediawiki_api_url, sparql_endpoint_url, node_path, edge_path, simulate=False): login = wdi_login.WDLogin(user=user, pwd=password, mediawiki_api_url=mediawiki_api_url) bot = Bot(node_path, edge_path, mediawiki_api_url, sparql_endpoint_url, login, simulate=simulate) bot.run()
def main(write=True, run_one=None): omim_qid = wdi_helpers.id_mapper(PROPS['OMIM ID'], prefer_exact_match=True, return_as_set=True) omim_qid = {k: list(v)[0] for k, v in omim_qid.items() if len(v) == 1} hpo_qid = wdi_helpers.id_mapper(PROPS['Human Phenotype Ontology ID'], prefer_exact_match=True, return_as_set=True) hpo_qid = {k: list(v)[0] for k, v in hpo_qid.items() if len(v) == 1} df = pd.read_csv("mitodb.csv", dtype=str) df['disease_qid'] = df.disease.map(omim_qid.get) df['phenotype_qid'] = df.hpo.map(hpo_qid.get) df.dropna(subset=['disease_qid', 'phenotype_qid'], inplace=True) records = df.to_dict("records") login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) bot = MitoBot(records, login, write=write, run_one=run_one) bot.run()
def genes(): entrez_wd = id_mapper("P351") login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) coll = MongoClient().wikidata_src.mygene metadata_coll = MongoClient().wikidata_src.mygene_sources metadata = metadata_coll.find_one() organism_info = organisms_info[7955] doc_filter = {'taxid': 7955, 'entrezgene': {'$exists': True}} docs = coll.find(doc_filter).batch_size(20) total = docs.count() print("total number of records: {}".format(total)) docs = HelperBot.validate_docs(docs, 'eukaryotic', PROPS['Entrez Gene ID']) records = HelperBot.tag_mygene_docs(docs, metadata) records = list(records) # find all names with dupes dupe_names = { k for k, v in Counter([x['symbol']['@value'] for x in records]).items() if v > 1 } # for all records that have one of these names, change the name to "name (entrezgene)" records = [x for x in records if x['symbol']['@value'] in dupe_names] for record in records: record['symbol']['@value'] = record['symbol']['@value'] + " (" + str( record['entrezgene']['@value']) + ")" # skip items that aren't already in wikidata (DONT CREATE NEW ITEMS!) records = [ x for x in records if str(x['entrezgene']['@value']) in entrez_wd ] print("len records: {}".format(len(records))) cb = ChromosomeBot() chr_num_wdid = cb.get_or_create(organism_info, login=login) bot = GeneBot.ChromosomalGeneBot(organism_info, chr_num_wdid, login) bot.filter = lambda x: iter(x) bot.run(records, total=total, fast_run=True, write=True)
def main(retrieved, fast_run, write, variant_id=None): login = wdi_login.WDLogin(WDUSER, WDPASS) if variant_id: records = [{'id': variant_id}] else: r = requests.get( 'https://civic.genome.wustl.edu/api/variants?count=999999999') variants_data = r.json() records = variants_data['records'] for record in tqdm(records): try: run_one(record['id'], retrieved, fast_run, write, login) except Exception as e: traceback.print_exc() wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(record['id'], PROPS['CIViC Variant ID'], None, str(e), type(e)))
def create_song(song_obj, artist_wikidata_id, artist_name): data = [] song_name = get_song_name(song_obj) data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID)) data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) data.append(wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID)) entity = wdi_core.WDItemEngine(data=data) entity.set_label(song_name) entity.set_description(f"Song by {artist_name}") login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020') entity.write(login_instance) print( f"Song {song_name} by {artist_name} has been added to WikiData server." )
def main(): parser = argparse.ArgumentParser() parser.add_argument("--entrypoint", default="http://*****:*****@Bot1") parser.add_argument("--password", default="kft004cr9toivjesd7obnqm16mpr3596") args = parser.parse_args() os.makedirs(args.cachedir, exist_ok=True) login = wdi_login.WDLogin( user=args.user, pwd=args.password, server=args.wikibase_server, base_url_template=args.base_url_template) loader = Wikiparl(args.oparl_schema_location, login, args.wikibase_server, args.base_url_template, args.cachedir) loader.load_type_mapping() loader.run(args.entrypoint)
def __init__(self, mediawiki_api_url, sparql_endpoint_url, username, password, set_of_uris_for_asio=set(), factory_of_uris: URIFactory = URIFactoryMock()): self.api_url = mediawiki_api_url self.sparql_url = sparql_endpoint_url self._local_item_engine = wdi_core.WDItemEngine. \ wikibase_item_engine_factory(mediawiki_api_url, sparql_endpoint_url) self._local_login = wdi_login.WDLogin(username, password, mediawiki_api_url) self._mappings_prop = self._get_or_create_mappings_prop() self._init_callbacks() # added the related link to original URI self._related_link_prop = self._get_or_create_related_link_prop() # for same As self._uri_set_for_sameas = set_of_uris_for_asio # Uris factory self._uris_factory = factory_of_uris