def create_fake_thesaurus(self, name): thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = "Title: " + name thesaurus.description = "SAMPLE FAKE THESAURUS USED FOR TESTING" thesaurus.date = "2016-10-01" thesaurus.save() for keyword in ['aaa', 'bbb', 'ccc']: tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = keyword + '_about' tk.alt_label = keyword + '_alt' tk.save() for l in ['it', 'en', 'es']: tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = l tkl.label = keyword + "_l_" + l + "_t_" + name tkl.save()
def create_fake_thesaurus(self, name): thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = f"Title: {name}" thesaurus.description = "SAMPLE FAKE THESAURUS USED FOR TESTING" thesaurus.date = "2016-10-01" thesaurus.save() for keyword in ['aaa', 'bbb', 'ccc']: tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = f"{keyword}_about" tk.alt_label = f"{keyword}_alt" tk.save() for _l in ['it', 'en', 'es']: tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = _l tkl.label = f"{keyword}_l_{_l}_t_{name}" tkl.save()
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = '{' + RDF_URI + '}about' LANG_ATTRIB = '{' + XML_URI + '}lang' ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = etree.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") title = scheme.find('dc:title', ns).text descr = scheme.find('dc:description', ns).text date_issued = scheme.find('dcterms:issued', ns).text print 'Thesaurus "{}" issued on {}'.format(title, date_issued) thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = title thesaurus.description = descr thesaurus.date = date_issued if store: thesaurus.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns).text print 'Concept {} ({})'.format(alt_label, about) tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print u' Label {}: {}'.format(lang, label) tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def load_thesaurus(self, input_file, name, store): g = Graph() # if the input_file is an UploadedFile object rather than a file path the Graph.parse() # method may not have enough info to correctly guess the type; in this case supply the # name, which should include the extension, to guess_format manually... rdf_format = None if isinstance(input_file, UploadedFile): self.stderr.write(self.style.WARNING(f"Guessing RDF format from {input_file.name}...")) rdf_format = guess_format(input_file.name) g.parse(input_file, format=rdf_format) # An error will be thrown here there is more than one scheme in the file scheme = g.value(None, RDF.type, SKOS.ConceptScheme, any=False) if scheme is None: raise CommandError("ConceptScheme not found in file") default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_titles = [t for t in g.objects(scheme, DC.title) if isinstance(t, Literal)] thesaurus_title = value_for_language(available_titles, default_lang) description = g.value(scheme, DC.description, None, default=thesaurus_title) date_issued = g.value(scheme, DCTERMS.issued, None, default="") self.stderr.write(self.style.SUCCESS(f'Thesaurus "{thesaurus_title}", desc: {description} issued at {date_issued}')) thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.description = description thesaurus.title = thesaurus_title thesaurus.about = str(scheme) thesaurus.date = date_issued if store: thesaurus.save() for lang in available_titles: if lang.language is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang.language thesaurus_label.label = lang.value thesaurus_label.thesaurus = thesaurus if store: thesaurus_label.save() for concept in g.subjects(RDF.type, SKOS.Concept): pref = g.preferredLabel(concept, default_lang)[0][1] about = str(concept) alt_label = g.value(concept, SKOS.altLabel, object=None, default=None) if alt_label is not None: alt_label = str(alt_label) else: available_labels = [t for t in g.objects(concept, SKOS.prefLabel) if isinstance(t, Literal)] alt_label = value_for_language(available_labels, default_lang) self.stderr.write(self.style.SUCCESS(f'Concept {str(pref)}: {alt_label} ({about})')) tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for _, pref_label in g.preferredLabel(concept): lang = pref_label.language label = str(pref_label) self.stderr.write(self.style.SUCCESS(f' Label {lang}: {label}')) tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = f"{{{RDF_URI}}}about" LANG_ATTRIB = f"{{{XML_URI}}}lang" ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = dlxml.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") titles = scheme.findall('dc:title', ns) default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB) thesaurus_title = determinate_value(available_lang, default_lang) descr = scheme.find('dc:description', ns).text if scheme.find( 'dc:description', ns) else thesaurus_title date_issued = scheme.find('dcterms:issued', ns).text about = scheme.attrib.get(ABOUT_ATTRIB) print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}') thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = thesaurus_title thesaurus.description = descr thesaurus.about = about thesaurus.date = date_issued if store: thesaurus.save() for lang in available_lang: if lang[0] is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang[0] thesaurus_label.label = lang[1] thesaurus_label.thesaurus = thesaurus thesaurus_label.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns) if alt_label is not None: alt_label = alt_label.text else: concepts = concept.findall('skos:prefLabel', ns) available_lang = get_all_lang_available_with_title( concepts, LANG_ATTRIB) alt_label = determinate_value(available_lang, default_lang) print(f'Concept {alt_label} ({about})') tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print(f' Label {lang}: {label}') tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()