Example #1
0
def map_psh_to_dbpedia():
    print "Mapping PSH to DBPedia..."
    hesla = list(query_to_dicts("""SELECT * FROM ekvivalence"""))
    count = len(hesla)
    i = 1

    for heslo in hesla:
        print "%s/%s"%(i, count)
        dbpedia = psh_mapper.map_to_dbpedia(heslo["ekvivalent"])
        if dbpedia:
            vazba, create = Vazbydbpedia.objects.get_or_create(id_heslo=heslo["id_heslo"], heslo_dbpedia=heslo["ekvivalent"].capitalize(), uri_dbpedia=dbpedia, typ_vazby="exactMatch")
            if create:
                vazba.save()
            print dbpedia
        i += 1
Example #2
0
def calculate_hierarchy_record_count():
    hierarchy = Hierarchie.objects.all()
    counts = list(query_to_dicts("""SELECT * FROM psh_pocetzaznamu"""))
    top = Topconcepts.objects.all()
    subject2broader = {}
    subject2count = {}

    for c in counts:
        subject2count[c["id_heslo"]] = c["pocet"]

    for h in hierarchy:
        subject2broader[h.podrazeny] = h.nadrazeny
    
    lowest = set(subject2broader.keys()) - set(subject2broader.values())
    lowest = list(lowest)
    already = set()

    i = 0
    for l in lowest:
        count = subject2count[l]
        lowest_count = count
        current = l
        while current in subject2broader:
            current = subject2broader[current]
            if current in already:
                subject2count[current] += lowest_count
            else:
                subject2count[current] += count
            already.add(current)
            count = subject2count[current]

    for s in subject2count:
        subject = PocetZaznamu.objects.get(id_heslo=s)
        subject.pocet_hierarchie = subject2count[s]
        subject.save()
    return
Example #3
0
def make_skos():
    header = """<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:cc="http://creativecommons.org/ns#"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:dcterms="http://purl.org/dc/terms/"
   xmlns:dctype="http://purl.org/dc/dcmitype/"
   xmlns:foaf="http://xmlns.com/foaf/0.1/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:skos="http://www.w3.org/2004/02/skos/core#"
   xmlns:xsd="http://www.w3.org/2001/XMLSchema#">
  <skos:ConceptScheme rdf:about="http://psh.ntkcz.cz/skos/">
    <cc:attributionName xml:lang="en">National Technical Library</cc:attributionName>
    <cc:attributionName xml:lang="cs">Národní technická knihovna</cc:attributionName>
    <cc:attributionURL rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/>
    <cc:legalcode rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/cz/"/>
    <cc:license rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/cz/"/>
    <cc:morePermissions rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/>
    <dc:creator>
      <rdf:Description>
        <foaf:mbox rdf:resource="mailto:[email protected]"/>
        <foaf:name xml:lang="en">National Technical Library</foaf:name>
        <foaf:name xml:lang="cs">Národní technická knihovna</foaf:name>
      </rdf:Description>
    </dc:creator>
    <dc:description xml:lang="cs">Polytematický strukturovaný heslář je česko-anglický řízený a měnitelný slovník lexikálních jednotek. Slouží k vyjádření věcného obsahu dokumentů a ke zpětnému vyhledání dokumentů na základě věcných kritérií a je určen především pro knihovny s polytematickými fondy.</dc:description>
    <dc:description xml:lang="en">Polythematic Structured Subject Heading System (PSH) is as a tool to organize and search for documents by subject. It is a set of subject headings which can be used to describe the document by subject. In its latest version (2.1) PSH is bilingual (Czech-English). Subject headings in both languages are interconnected. PSH contains over 13 000 subject headings and is divided into 44 thematic sections which have been prepared by experts in the respective disciplines in cooperation with librarians. Each subject heading is included in a hierarchy of six (or - under special circumstances - seven) levels according to its semantic content and specificity. The whole system is a tree structure and it represents various concepts from the most general to the more specific ones.</dc:description>
    <dc:language rdf:resource="http://lexvo.org/id/iso639-3/ces"/>
    <dc:language rdf:resource="http://lexvo.org/id/iso639-3/eng"/>
    <dc:language rdf:datatype="http://purl.org/dc/terms/ISO639-2">cze</dc:language>
    <dc:language rdf:datatype="http://purl.org/dc/terms/ISO639-2">eng</dc:language>
    <dc:publisher>
      <rdf:Description>
        <foaf:mbox rdf:resource="mailto:[email protected]"/>
        <foaf:name xml:lang="en">National Technical Library</foaf:name>
        <foaf:name xml:lang="cs">Národní technická knihovna</foaf:name>
      </rdf:Description>
    </dc:publisher>
    <dc:subject rdf:datatype="http://purl.org/dc/terms/LCC">025.43</dc:subject>
    <dc:subject rdf:datatype="http://purl.org/dc/terms/LCC">Z696.P65</dc:subject>
    <dc:subject xml:lang="cs">předmětová hesla</dc:subject>
    <dc:subject xml:lang="en">subject heading system</dc:subject>
    <dc:subject xml:lang="en">systematic retrieval language</dc:subject>
    <dc:subject xml:lang="cs">systematický selekční jazyk</dc:subject>
    <dc:title xml:lang="cs">Polytematický strukturovaný heslář</dc:title>
    <dc:title xml:lang="en">Polythematic Structured Subject Heading System</dc:title>
    <dc:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dcterms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#year">1993</dcterms:created>
    <dcterms:modified>%s</dcterms:modified>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10067"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10355"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1038"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH10652"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11322"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11453"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11591"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH116"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH11939"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12008"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12156"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1217"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12314"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH12577"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH13220"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH1781"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2086"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2395"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2596"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH2910"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH320"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH3768"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH4231"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH4439"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5042"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5176"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH5450"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH573"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6445"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6548"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6641"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH6914"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7093"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7769"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH7979"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8126"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8308"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8613"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH8808"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9194"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9508"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9759"/>
    <skos:hasTopConcept rdf:resource="http://psh.ntkcz.cz/skos/PSH9899"/>
    <foaf:homepage rdf:resource="http://www.techlib.cz/cs/katalogy-a-databaze/psh/"/>
  </skos:ConceptScheme>\n\n"""% datetime.date.today()

    skos_dir = os.path.join(settings.ROOT, "static/skos")
    skos_file = open("%s/psh-skos.rdf" %skos_dir, "w")
    skos_file.write(header)

    hesla = query_to_dicts("""SELECT id_heslo
        FROM hesla""")
    hesla = list(hesla)
    print len(hesla)
    id_hesel = [heslo["id_heslo"] for heslo in hesla]

    for id_heslo in id_hesel:
        print id_heslo
        heslo = get_concept_as_dict(id_heslo)
        skos_file.write("".join(['<skos:Concept rdf:about="http://psh.ntkcz.cz/skos/', heslo["id_heslo"],'">\n']))
        skos_file.write('<skos:inScheme rdf:resource="http://psh.ntkcz.cz/skos/"/>\n')
        skos_file.write("".join(['<dc:identifier>', heslo["id_heslo"],'</dc:identifier>\n']))
        skos_file.write("".join(['<skos:prefLabel xml:lang="cs">', heslo["heslo"],'</skos:prefLabel>\n']).encode("utf8"))
        skos_file.write("".join(['<skos:prefLabel xml:lang="en">', heslo["ekvivalent"],'</skos:prefLabel>\n']).encode("utf8"))
        for varianta in heslo["varianty"]:
            skos_file.write("".join(['<skos:altLabel xml:lang="', varianta["jazyk"],'">', varianta["varianta"],'</skos:altLabel>\n']).encode("utf8"))

        for podrazeny in heslo["podrazeny"]:
            skos_file.write("".join(['<skos:narrower rdf:resource="http://psh.ntkcz.cz/skos/', podrazeny,'"/>\n']))

        for pribuzny in heslo["pribuzny"]:
            skos_file.write("".join(['<skos:related rdf:resource="http://psh.ntkcz.cz/skos/', pribuzny,'"/>\n']))

        if heslo["nadrazeny"]:
            skos_file.write("".join(['<skos:broader rdf:resource="http://psh.ntkcz.cz/skos/', heslo["nadrazeny"],'"/>\n']))

        if heslo["vazba_wikipedia"]:
            skos_file.write("".join(['<skos:exactMatch rdf:resource="', heslo["vazba_wikipedia"],'" />\n']).encode("utf-8"))
        skos_file.write("</skos:Concept>\n\n")

    skos_file.write("</rdf:RDF>")
    skos_file.close()

    skos_dir = os.path.join(settings.ROOT, "static/skos")
    os.system("zip -j %s/psh-skos.zip %s/psh-skos.rdf" %(skos_dir, skos_dir))
Example #4
0
def get_concept_as_dict(subject_id):
    """Get concept as dict from database according to its PSH ID"""
    heslo = query_to_dicts("""SELECT hesla.id_heslo, 
        hesla.heslo,
        ekvivalence.ekvivalent
        FROM hesla
        LEFT JOIN ekvivalence ON ekvivalence.id_heslo = hesla.id_heslo
        WHERE hesla.id_heslo = '%s'""" %subject_id)

    varianty = query_to_dicts("""SELECT varianta,
        jazyk
        FROM varianta
        WHERE id_heslo = '%s'""" %subject_id)

    podrazeny = query_to_dicts("""SELECT podrazeny
        FROM hierarchie
        WHERE nadrazeny = '%s'""" %subject_id)

    nadrazeny = query_to_dicts("""SELECT nadrazeny
        FROM hierarchie
        WHERE podrazeny = '%s'""" %subject_id)

    pribuzny = query_to_dicts("""SELECT pribuzny
        FROM pribuznost
        WHERE pribuznost.id_heslo = '%s'""" %subject_id)

    zkratka = query_to_dicts("""SELECT zkratka
        FROM psh_zkratka
        WHERE psh_zkratka.id_heslo = '%s'""" %subject_id)

    vazba_wikipedia = query_to_dicts("""SELECT uri_wikipedia
        FROM vazbywikipedia
        WHERE vazbywikipedia.id_heslo = '%s'""" %subject_id)

    hesla = list(heslo)
    if hesla:
        heslo = hesla[0]

        heslo["nadrazeny"] = ""
        for n in nadrazeny:
            heslo["nadrazeny"] = n["nadrazeny"]

        heslo["zkratka"] = list(zkratka)[0]["zkratka"]
        heslo["podrazeny"] = []
        heslo["pribuzny"] = []
        heslo["varianty"] = []

        for p in podrazeny:
            heslo["podrazeny"].append(p["podrazeny"])
        for p in pribuzny:
            heslo["pribuzny"].append(p["pribuzny"])
        for v in varianty:
            heslo["varianty"].append({"varianta": v["varianta"], "jazyk": v["jazyk"]})

        heslo["vazba_wikipedia"] = ""
        for n in vazba_wikipedia:
            print n
            heslo["vazba_wikipedia"] = n["uri_wikipedia"]        
    else:
        heslo = None
    return heslo