def map_psh_to_dbpedia(): print "Mapping PSH to DBPedia..." hesla = list(query_to_dicts("""SELECT * FROM ekvivalence""")) count = len(hesla) i = 1 for heslo in hesla: print "%s/%s"%(i, count) dbpedia = psh_mapper.map_to_dbpedia(heslo["ekvivalent"]) if dbpedia: vazba, create = Vazbydbpedia.objects.get_or_create(id_heslo=heslo["id_heslo"], heslo_dbpedia=heslo["ekvivalent"].capitalize(), uri_dbpedia=dbpedia, typ_vazby="exactMatch") if create: vazba.save() print dbpedia i += 1
def calculate_hierarchy_record_count(): hierarchy = Hierarchie.objects.all() counts = list(query_to_dicts("""SELECT * FROM psh_pocetzaznamu""")) top = Topconcepts.objects.all() subject2broader = {} subject2count = {} for c in counts: subject2count[c["id_heslo"]] = c["pocet"] for h in hierarchy: subject2broader[h.podrazeny] = h.nadrazeny lowest = set(subject2broader.keys()) - set(subject2broader.values()) lowest = list(lowest) already = set() i = 0 for l in lowest: count = subject2count[l] lowest_count = count current = l while current in subject2broader: current = subject2broader[current] if current in already: subject2count[current] += lowest_count else: subject2count[current] += count already.add(current) count = subject2count[current] for s in subject2count: subject = PocetZaznamu.objects.get(id_heslo=s) subject.pocet_hierarchie = subject2count[s] subject.save() return
def make_skos(): header = """<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#"> <skos:ConceptScheme rdf:about="http://psh.ntkcz.cz/skos/"> <cc:attributionName xml:lang="en">National Technical Library</cc:attributionName> <cc:attributionName xml:lang="cs">Národní technická knihovna</cc:attributionName> <cc:attributionURL rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/> <cc:legalcode rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/cz/"/> <cc:license rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/cz/"/> <cc:morePermissions rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/> <dc:creator> <rdf:Description> <foaf:mbox rdf:resource="mailto:[email protected]"/> <foaf:name xml:lang="en">National Technical Library</foaf:name> <foaf:name xml:lang="cs">Národní technická knihovna</foaf:name> </rdf:Description> </dc:creator> <dc:description xml:lang="cs">Polytematický strukturovaný heslář je česko-anglický řízený a měnitelný slovník lexikálních jednotek. Slouží k vyjádření věcného obsahu dokumentů a ke zpětnému vyhledání dokumentů na základě věcných kritérií a je určen především pro knihovny s polytematickými fondy.</dc:description> <dc:description xml:lang="en">Polythematic Structured Subject Heading System (PSH) is as a tool to organize and search for documents by subject. It is a set of subject headings which can be used to describe the document by subject. In its latest version (2.1) PSH is bilingual (Czech-English). Subject headings in both languages are interconnected. PSH contains over 13 000 subject headings and is divided into 44 thematic sections which have been prepared by experts in the respective disciplines in cooperation with librarians. Each subject heading is included in a hierarchy of six (or - under special circumstances - seven) levels according to its semantic content and specificity. The whole system is a tree structure and it represents various concepts from the most general to the more specific ones.</dc:description> <dc:language rdf:resource="http://lexvo.org/id/iso639-3/ces"/> <dc:language rdf:resource="http://lexvo.org/id/iso639-3/eng"/> <dc:language rdf:datatype="http://purl.org/dc/terms/ISO639-2">cze</dc:language> <dc:language rdf:datatype="http://purl.org/dc/terms/ISO639-2">eng</dc:language> <dc:publisher> <rdf:Description> <foaf:mbox rdf:resource="mailto:[email protected]"/> <foaf:name xml:lang="en">National Technical Library</foaf:name> <foaf:name xml:lang="cs">Národní technická knihovna</foaf:name> </rdf:Description> </dc:publisher> <dc:subject rdf:datatype="http://purl.org/dc/terms/LCC">025.43</dc:subject> <dc:subject rdf:datatype="http://purl.org/dc/terms/LCC">Z696.P65</dc:subject> <dc:subject xml:lang="cs">předmětová hesla</dc:subject> <dc:subject xml:lang="en">subject heading system</dc:subject> <dc:subject xml:lang="en">systematic retrieval language</dc:subject> <dc:subject xml:lang="cs">systematický selekční jazyk</dc:subject> <dc:title xml:lang="cs">Polytematický strukturovaný heslář</dc:title> <dc:title xml:lang="en">Polythematic Structured Subject Heading System</dc:title> <dc:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/> <dcterms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#year">1993</dcterms:created> <dcterms:modified>%s</dcterms:modified> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10067"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10355"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1038"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10652"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11322"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11453"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11591"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH116"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11939"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12008"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12156"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1217"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12314"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12577"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH13220"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1781"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2086"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2395"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2596"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2910"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH320"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH3768"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH4231"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH4439"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5042"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5176"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5450"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH573"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6445"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6548"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6641"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6914"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7093"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7769"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7979"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8126"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8308"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8613"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8808"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9194"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9508"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9759"/> <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9899"/> <foaf:homepage rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/> </skos:ConceptScheme>\n\n"""% datetime.date.today() skos_dir = os.path.join(settings.ROOT, "static/skos") skos_file = open("%s/psh-skos.rdf" %skos_dir, "w") skos_file.write(header) hesla = query_to_dicts("""SELECT id_heslo FROM hesla""") hesla = list(hesla) print len(hesla) id_hesel = [heslo["id_heslo"] for heslo in hesla] for id_heslo in id_hesel: print id_heslo heslo = get_concept_as_dict(id_heslo) skos_file.write("".join(['<skos:Concept rdf:about="http://psh.ntkcz.cz/skos/', heslo["id_heslo"],'">\n'])) skos_file.write('<skos:inScheme rdf:resource="http://psh.ntkcz.cz/skos/"/>\n') skos_file.write("".join(['<dc:identifier>', heslo["id_heslo"],'</dc:identifier>\n'])) skos_file.write("".join(['<skos:prefLabel xml:lang="cs">', heslo["heslo"],'</skos:prefLabel>\n']).encode("utf8")) skos_file.write("".join(['<skos:prefLabel xml:lang="en">', heslo["ekvivalent"],'</skos:prefLabel>\n']).encode("utf8")) for varianta in heslo["varianty"]: skos_file.write("".join(['<skos:altLabel xml:lang="', varianta["jazyk"],'">', varianta["varianta"],'</skos:altLabel>\n']).encode("utf8")) for podrazeny in heslo["podrazeny"]: skos_file.write("".join(['<skos:narrower rdf:resource="http://psh.ntkcz.cz/skos/', podrazeny,'"/>\n'])) for pribuzny in heslo["pribuzny"]: skos_file.write("".join(['<skos:related rdf:resource="http://psh.ntkcz.cz/skos/', pribuzny,'"/>\n'])) if heslo["nadrazeny"]: skos_file.write("".join(['<skos:broader rdf:resource="http://psh.ntkcz.cz/skos/', heslo["nadrazeny"],'"/>\n'])) if heslo["vazba_wikipedia"]: skos_file.write("".join(['<skos:exactMatch rdf:resource="', heslo["vazba_wikipedia"],'" />\n']).encode("utf-8")) skos_file.write("</skos:Concept>\n\n") skos_file.write("</rdf:RDF>") skos_file.close() skos_dir = os.path.join(settings.ROOT, "static/skos") os.system("zip -j %s/psh-skos.zip %s/psh-skos.rdf" %(skos_dir, skos_dir))
def get_concept_as_dict(subject_id): """Get concept as dict from database according to its PSH ID""" heslo = query_to_dicts("""SELECT hesla.id_heslo, hesla.heslo, ekvivalence.ekvivalent FROM hesla LEFT JOIN ekvivalence ON ekvivalence.id_heslo = hesla.id_heslo WHERE hesla.id_heslo = '%s'""" %subject_id) varianty = query_to_dicts("""SELECT varianta, jazyk FROM varianta WHERE id_heslo = '%s'""" %subject_id) podrazeny = query_to_dicts("""SELECT podrazeny FROM hierarchie WHERE nadrazeny = '%s'""" %subject_id) nadrazeny = query_to_dicts("""SELECT nadrazeny FROM hierarchie WHERE podrazeny = '%s'""" %subject_id) pribuzny = query_to_dicts("""SELECT pribuzny FROM pribuznost WHERE pribuznost.id_heslo = '%s'""" %subject_id) zkratka = query_to_dicts("""SELECT zkratka FROM psh_zkratka WHERE psh_zkratka.id_heslo = '%s'""" %subject_id) vazba_wikipedia = query_to_dicts("""SELECT uri_wikipedia FROM vazbywikipedia WHERE vazbywikipedia.id_heslo = '%s'""" %subject_id) hesla = list(heslo) if hesla: heslo = hesla[0] heslo["nadrazeny"] = "" for n in nadrazeny: heslo["nadrazeny"] = n["nadrazeny"] heslo["zkratka"] = list(zkratka)[0]["zkratka"] heslo["podrazeny"] = [] heslo["pribuzny"] = [] heslo["varianty"] = [] for p in podrazeny: heslo["podrazeny"].append(p["podrazeny"]) for p in pribuzny: heslo["pribuzny"].append(p["pribuzny"]) for v in varianty: heslo["varianty"].append({"varianta": v["varianta"], "jazyk": v["jazyk"]}) heslo["vazba_wikipedia"] = "" for n in vazba_wikipedia: print n heslo["vazba_wikipedia"] = n["uri_wikipedia"] else: heslo = None return heslo