"word sense": "Q22504", "word-form": "Q22505", "word-formation": "Q22506", "wordform": "Q22507", "wordnet": "Q22508" } with open(config.datafolder + "/terms/SkE terms for SKOS vocab - batch 1.csv", 'r', encoding="utf-8") as csvfile: csvdict = csv.DictReader(csvfile) for item in csvdict: time.sleep(1) print(str(item)) if item['SKOS Concept URI'] != "": lwbqid = lwb.getqid("Q7", item['SKOS Concept URI']) else: lwbqid = map[item['Keyword4newScheme']] schemeStatement = lwb.updateclaim(lwbqid, "P74", "Q22279", "item") # skos:inScheme SkE #1 scoreStatement = lwb.updateclaim(lwbqid, "P82", item['SkE score'], "string") lwb.setqualifier(lwbqid, "P82", scoreStatement, "P83", "Q22279", "item") lwb.setqualifier(lwbqid, "P82", scoreStatement, "P88", item['Keyword4newScheme'], "string")
"http://www.w3.org/2004/02/skos/core#exactMatch": "P78", "http://www.w3.org/2004/02/skos/core#relatedMatch": "P79", "http://www.w3.org/2004/02/skos/core#definition": "P80" } with open('D:/LexBib/terms/SKOS4lwb.json', encoding="utf-8") as f: data = json.load(f)['results']['bindings'] count = 1 for row in data: print('\nTriple [' + str(count) + '], ' + str(len(data) - count) + ' triples left.') lwbs = lwb.getqid("Q7", row['s']['value']) if row['p']['value'] in propmap: if row['o']['type'] == "literal": statement = lwb.updateclaim(lwbs, propmap[row['p']['value']], row['o']['value'].rstrip(), "string") else: lwbo = lwb.getqid("Q7", row['o']['value'].rstrip()) statement = lwb.updateclaim(lwbs, propmap[row['p']['value']], lwbo, "item") elif row['p']['value'] == "http://www.w3.org/2004/02/skos/core#prefLabel": lwb.setlabel(lwbs, row['o']['xml:lang'], row['o']['value'].rstrip()) elif row['p']['value'] == "http://www.w3.org/2004/02/skos/core#altLabel": lwb.setlabel(lwbs, row['o']['xml:lang'], row['o']['value'].rstrip(), type="alias") count += 1
import json import lwb with open('D:/LexBib/journals/article_issn.json', encoding="utf-8") as f: itemdict = json.load(f) with open('D:/LexBib/journals/issn_journals.json', encoding="utf-8") as f: journaldict = json.load(f) issndict = {} for journal in journaldict: issndict[journal['issn']] = journal['journal'].replace( "http://data.lexbib.org/entity/", "") count = 0 for item in itemdict: count += 1 lwbqid = item['item'].replace("http://data.lexbib.org/entity/", "") lwb.updateclaim(lwbqid, "P46", issndict[item['issn']], "item") print('OK. ' + str(len(itemdict) - count) + ' items left.')
count = 1 for item in data: print('\nItem ['+str(count)+'].') bibItem = item['bibItem'].replace("http://data.lexbib.org/entity/","") print('BibItem is '+bibItem+'.') oldStatement = re.sub(guidfix, r'\1$', item['statement_id']) if 'Qid' in item and item['Qid'].startswith("Q"): lwbqid = re.search(guidfix, item['statement_id']).group(1) creatorqid = item['Qid'] #print(lwbqid, oldStatement) claim = lwb.getclaimfromstatement(oldStatement) if "P39" in claim: newprop = "P12" listpos = claim["P39"][0]['qualifiers']["P33"][0]['datavalue']['value'] elif "P42" in claim: newprop = "P13" listpos = claim["P42"][0]['qualifiers']["P33"][0]['datavalue']['value'] else: print('*** Something is wrong with this supposed creator literal statement') time.sleep(10) newStatement = lwb.updateclaim(lwbqid,newprop,creatorqid,"item") lwb.setqualifier(lwbqid,newprop,newStatement,"P33",listpos,"string") lwb.setqualifier(lwbqid,newprop,newStatement,"P67",item["firstName"]+" "+item["lastName"],"string") lwb.removeclaim(oldStatement) else: print('We have no item for author '+item['creatorName']) time.sleep(1) count +=1
while (not done): try: request = wikidata.get('wbgetclaims', entity=wdqid, property=wdprop) if "claims" in request: done = True except Exception as ex: print('Getclaims operation failed, will try again...\n' + str(ex)) time.sleep(4) if bool(request['claims']): value = request['claims'][wdprop][0]['mainsnak']['datavalue'][ 'value'] #print(lwbqid+prop+value) statement = lwb.updateclaim(lwbqid, prop, value, "url") # lwbsparql = SPARQLWrapper("https://data.lexbib.org/query/sparql", agent='LexBib (lexbib.org)') # lwbsparql.setQuery("""PREFIX ldp: <http://data.lexbib.org/prop/direct/> # SELECT ?item ?class WHERE { # ?item ldp:P5 ?class . # }""") # lwbsparql.setReturnFormat(JSON) # while True: # try: # time.sleep(1.5) # result = lwbsparql.query().convert() # datalist = result['results']['bindings'] # print(str(result)) # break # except Exception as ex:
import json import lwb with open('D:/LexBib/terms/langdict.json', encoding="utf-8") as f: langdict = json.load(f) with open('D:/LexBib/languages/publangs.txt', encoding="utf-8") as f: publangs = f.read().split('\n') count = 0 for lang in publangs: count += 1 print('\nLine [' + str(count) + '] of ' + str(len(publangs)) + ': ' + lang) qid = lwb.getqid(["Q8"], lang) # class Language statement = lwb.updateclaim(qid, "P4", langdict[lang]['wdqid'], "url") statement = lwb.updateclaim(qid, "P32", lang, "url") for label in langdict[lang]['labels']: statement = lwb.setlabel(qid, label, langdict[lang]['labels'][label]) print('OK. ' + str(len(publangs) - count) + ' languages left.') print('\nFinished.\n')
wdqid = datalist[0]['journal']['value'] label = datalist[0]['journalLabel']['value'] success = 1 regexp = re.compile(r'Q\d+') if regexp.search(label): label = "" except Exception as ex: print("ISSN " + issn + " not found on wikidata, skipping, will add to orphaned list.") orphaned += issn + '\tnot found on wikidata.\n' continue # create lwb serial for this orphaned issn lwbqid = lwb.getqid("Q20", wdqid) # for serials, wdqid is also lexbib uri statement = lwb.updateclaim(lwbqid, "P3", wdqid, "url") statement = lwb.updateclaim(lwbqid, "P20", issn, "string") statement = lwb.updateclaim(lwbqid, "P4", wdqid, "url") statement = lwb.setlabel(lwbqid, "en", label) # add P46 "contained in serial" to bibitems with that issn # get bibitems url = "https://data.lexbib.org/query/sparql?format=json&query=PREFIX%20lwb%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fentity%2F%3E%0APREFIX%20ldp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fdirect%2F%3E%0A%0Aselect%20%3FbibItem%20%3Fissn%20%3Fjournal%20where%0A%7B%20%3FbibItem%20ldp%3AP5%20lwb%3AQ3%20.%0A%20%20%3FbibItem%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20ldp%3AP5%20lwb%3AQ20%20.%0A%20%3Fjournal%20ldp%3AP20%20%3Fissn%20.%0A%20FILTER%20%28%3Fissn%20%3D%20%22" + issn + "%22%29%7D" done = False while (not done): try: r2 = requests.get(url) bindings2 = r2.json()['results']['bindings'] except Exception as ex: print('Error: SPARQL request failed: ' + str(ex))
if rep > 4: # break 'while' loop after 5 failed attempts to process item print( '\nbibimport.py has entered in an endless loop... abort.') break else: print('\n' + str(index) + ' items processed. ' + str(totalrows - index) + ' list items left.\n') #time.sleep(1) rep += 1 try: item = data[index] qid = lwb.getqid( "Q3", item['lexbibUri']) # Q3: LexBib BibItem class classStatement = lwb.updateclaim(qid, "P5", item['lexbibClass'], "item") for triple in item['creatorvals']: #check if creator with that position is already there as item (not literal) skip = False if triple['property'] == "P39": itemprop = "P12" elif triple['property'] == "P42": itemprop = "P13" for Qualifier in triple['Qualifiers']: if Qualifier['property'] == "P33": listpos = Qualifier['value'] print( 'Found ' + triple['property'] + ' creator listpos: ', listpos) creator_item_claims = lwb.getclaims(qid, itemprop)
#print(str(item)) bibItem = item['bibItem'].replace("http://data.lexbib.org/entity/", "") print('BibItem is ' + bibItem + '.') oldStatement = re.sub(guidfix, r'\1$', item['statement_id']) bibitemqid = re.search(guidfix, item['statement_id']).group(1) if 'Qid' in item and item['Qid'].startswith("Q"): creatorqid = item['Qid'] creatorPrefLabel = lwb.getlabel(creatorqid, "en") print('This is a known creator item: ' + creatorqid + ' ' + creatorPrefLabel) else: print('We have no item for author ' + item['creatorName'] + ', will set up a new item.') creatorqid = lwb.newitemwithlabel("Q5", "en", item['creatorName']) creatorPrefLabel = item['creatorName'] lwb.updateclaim(creatorqid, "P40", item['firstName'], "string") lwb.updateclaim(creatorqid, "P41", item['lastName'], "string") claim = lwb.getclaimfromstatement(oldStatement) if "P39" in claim: newprop = "P12" listpos = claim["P39"][0]['qualifiers']["P33"][0]['datavalue'][ 'value'] elif "P42" in claim: newprop = "P13" listpos = claim["P42"][0]['qualifiers']["P33"][0]['datavalue'][ 'value'] else: print( '*** Something is wrong with this supposed creator literal statement' ) time.sleep(10)
for prop in props: if prop == "en.wiki": # get en.wikipedia url and write it to LWB using P66 itemcount = 1 for item in lwbitems: print('\nItem [' + str(itemcount) + '], ' + str(len(lwbitems) - itemcount) + ' items left.') wdqid = item['wdqid']['value'].replace( "http://www.wikidata.org/entity/", "") lwbqid = item['item']['value'].replace( "http://data.lexbib.org/entity/", "") print('Will now get en.wikipedia page url for LWB item: ' + lwbqid + ' from wdItem: ' + wdqid) enwikiurl = lwb.get_wikipedia_url_from_wikidata_id( wdqid, lang='en') #, debug=True) lwb.updateclaim(lwbqid, "P66", enwikiurl, "url") itemcount += 1 elif prop == "en.label": # get label (English), and write it to LWB itemcount = 1 for item in lwbitems: print('\nItem [' + str(itemcount) + '], ' + str(len(lwbitems) - itemcount) + ' items left.') wdqid = item['wdqid']['value'].replace( "http://www.wikidata.org/entity/", "") lwbqid = item['item']['value'].replace( "http://data.lexbib.org/entity/", "") print('Will now get label (English) for LWB item: ' + lwbqid + ' from wdItem: ' + wdqid) done = False while (not done):
url = "https://data.lexbib.org/query/sparql?format=json&query=PREFIX%20lwb%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fentity%2F%3E%0APREFIX%20ldp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fdirect%2F%3E%0APREFIX%20lp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2F%3E%0APREFIX%20lps%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fstatement%2F%3E%0APREFIX%20lpq%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fqualifier%2F%3E%0A%23%20%28group_concat%28%28strafter%28%3Fbibitem%2C%22http%3A%2F%2Fdata.lexbib.org%2Fentity%2F%22%29%29%3B%20separator%20%3D%20%22%40%22%29%20as%20%3Fbibitems%29%0Aselect%20distinct%20%3Fissue%20%3Fissuelabel%20%3Fissn%20%3Fjournal%20%3Fjournallabel%20%28group_concat%28%28strafter%28str%28%3Fbibitem%29%2C%22http%3A%2F%2Fdata.lexbib.org%2Fentity%2F%22%29%29%3B%20separator%20%3D%20%22%40%22%29%20as%20%3Fbibitems%29%20where%0A%7B%3Fissue%20ldp%3AP5%20lwb%3AQ1907%20.%0A%20%3Fissue%20rdfs%3Alabel%20%3Fissuelabel%20.%0A%20%3Fbibitem%20ldp%3AP9%20%3Fissue%20.%0A%20%3Fbibitem%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20ldp%3AP5%20lwb%3AQ20%20.%0A%20%3Fjournal%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20rdfs%3Alabel%20%3Fjournallabel%20.%0A%0A%20%20%7D%0AGROUP%20BY%20%3Fissue%20%3Fissuelabel%20%3Fissn%20%3Fjournal%20%3Fjournallabel%20%3Fbibitems" done = False while (not done): try: r = requests.get(url) bindings = r.json()['results']['bindings'] except Exception as ex: print('Error: SPARQL request failed: '+str(ex)) time.sleep(2) continue done = True #print(str(bindings)) print('Found '+str(len(bindings))+' journal issues...\n') time.sleep(3) count = 0 for item in bindings: count +=1 issueqid = item['issue']['value'].replace("http://data.lexbib.org/entity/","") issn = item['issn']['value'] lwb.updateclaim(issueqid,"P20",issn,"string") journalqid = item['journal']['value'].replace("http://data.lexbib.org/entity/","") lwb.updateclaim(issueqid,"P46",journalqid,"item") for bibitem in item['bibitems']['value'].split('@'): lwb.updateclaim(bibitem,"P46",journalqid,"item") print('OK. '+str(len(bindings)-count)+' items left.\n')
# get csv (part of google spreadsheet used for manual BabelID annotation) with open('D:/LexBib/terms/term_bnid_status_labels.csv') as csvfile: termdict = csv.DictReader(csvfile) termlist = list(termdict) print(str(termlist)) totalrows = len(termlist) #print(str(termdict)) count = 1 processed = [] for row in termlist: print('\nNow processing term ' + str(count) + ' of ' + str(totalrows) + ': ' + row["term"]) lwbqid = lwb.getqid("Q7", row['term']) if row['term'] not in processed and row["status"] != "": if row['bnid'].startswith("bn:"): statement = lwb.updateclaim(lwbqid, "P86", row['bnid'], "string") qualifier = lwb.setqualifier(lwbqid, "P86", statement, "P87", row['status'], "string") reference = lwb.setref(statement, "P3", row['term'], "url") elif row['bnid'] == "" and row['status'] == "0": statement = lwb.updateclaim(lwbqid, "P86", "novalue", "novalue") qualifier = lwb.setqualifier(lwbqid, "P86", statement, "P87", "0", "string") reference = lwb.setref(statement, "P3", row['term'], "url") processed.append(row['term']) count += 1
import json import lwb import csv propmap = { # "http://www.w3.org/2004/02/skos/core#broader": "P72", # "http://www.w3.org/2004/02/skos/core#inScheme": "P74", # "http://www.w3.org/2004/02/skos/core#narrower": "P73", # "http://www.w3.org/2004/02/skos/core#topConceptOf": "P75", # "http://www.w3.org/2004/02/skos/core#note": "P81", # "http://www.w3.org/2004/02/skos/core#related": "P76", # "http://www.w3.org/2004/02/skos/core#closeMatch": "P77", # "http://www.w3.org/2004/02/skos/core#exactMatch": "P78", # "http://www.w3.org/2004/02/skos/core#relatedMatch": "P79", "http://www.w3.org/2004/02/skos/core#definition": "P80" } with open('D:/LexBib/terms/SKOS_defs_fix.csv', encoding="utf-8") as f: data = csv.DictReader(f) count = 1 for row in data: print('\nDef [' + str(count) + ']: ' + row['subject']) lwbs = lwb.getqid("Q7", row['subject']) statement = lwb.updateclaim(lwbs, "P80", row['def'], "string") reference = lwb.setref(statement, "P3", row['subject'], "url") # lwb.setlabel(lwbs, row['o']['xml:lang'], row['o']['value'].rstrip(), type="alias") count += 1