def save_keywords(self, graph): if self.verbosity >= 2: print("Saving data") data_source = DataSource.objects.get(pk='yso') assert not Keyword.objects.filter( data_source=self.data_source).exists() bulk_mode = True if not bulk_mode: delete_func = lambda obj: obj.delete() queryset = KeywordLabel.objects.filter( data_source=self.data_source) label_syncher = ModelSyncher(queryset, lambda obj: (obj.name, obj.language_id), delete_func=delete_func) keyword_labels = {} labels_to_create = set() for subject, label in graph.subject_objects(SKOS.altLabel): if (subject, RDF.type, SKOS.Concept) in graph: yid = self.yso_id(subject) if bulk_mode: if label.language is not None: language = label.language if label.language == 'se': # YSO doesn't contain se, assume an error. language = 'sv' labels_to_create.add((str(label), language)) keyword_labels.setdefault(yid, []).append(label) else: label = self.save_alt_label(label_syncher, graph, label, data_source) if label: keyword_labels.setdefault(yid, []).append(label) if bulk_mode: KeywordLabel.objects.bulk_create([ KeywordLabel(name=name, language_id=language) for name, language in labels_to_create ]) else: label_syncher.finish() if bulk_mode: # self.save_labels_in_bulk(graph, data_source) self.save_keywords_in_bulk(graph, data_source) self.save_keyword_label_relationships_in_bulk(keyword_labels) if not bulk_mode: queryset = Keyword.objects.filter(data_source=self.data_source) syncher = ModelSyncher(queryset, lambda obj: obj.url, delete_func=delete_func) save_set = set() for subject in graph.subjects(RDF.type, SKOS.Concept): self.save_keyword(syncher, graph, subject, data_source, keyword_labels, save_set) syncher.finish()
def save_alt_label(self, syncher, graph, label): label_text = str(label) if label.language is None: print('Error:', str(label), 'has no language') return None label_object = syncher.get((label_text, str(label.language))) if label_object is None: language = Language.objects.get(id=label.language) label_object = KeywordLabel(name=label_text, language=language) label_object._changed = True label_object._created = True else: label_object._created = False if label_object._created: # Since there are duplicates, only save & mark them once. label_object.save() if not getattr(label_object, '_found', False): syncher.mark(label_object) return label_object
def save_alt_label(self, syncher, graph, label, data_source): label_text = str(label) if label.language is None: print('Error:', str(label), 'has no language') return None label_object = syncher.get((label_text, str(label.language))) if label_object is None: language = Language.objects.get(id=label.language) label_object = KeywordLabel( name=label_text, language=language, data_source=data_source) label_object._changed = True label_object._created = True else: label_object._created = False if label_object._created: # Since there are duplicates, only save & mark them once. label_object.save() if not getattr(label_object, '_found', False): syncher.mark(label_object) return label_object
def save_keywords(self, graph): if self.verbosity >= 2: print("Saving data") bulk_mode = False if bulk_mode: assert not Keyword.objects.filter( data_source=self.data_source).exists() if not bulk_mode: queryset = KeywordLabel.objects.all() label_syncher = ModelSyncher(queryset, lambda obj: (obj.name, obj.language_id), delete_func=lambda obj: obj.delete()) keyword_labels = {} labels_to_create = set() for subject, label in graph.subject_objects(SKOS.altLabel): if (subject, RDF.type, SKOS.Concept) in graph: yid = get_yso_id(subject) if bulk_mode: if label.language is not None: language = label.language if label.language == 'se': # YSO doesn't contain se, assume an error. language = 'sv' labels_to_create.add((str(label), language)) keyword_labels.setdefault(yid, []).append(label) else: label = self.save_alt_label(label_syncher, graph, label) if label: keyword_labels.setdefault(yid, []).append(label) if bulk_mode: KeywordLabel.objects.bulk_create([ KeywordLabel(name=name, language_id=language) for name, language in labels_to_create ]) else: label_syncher.finish() if bulk_mode: self.save_keywords_in_bulk(graph) self.save_keyword_label_relationships_in_bulk(keyword_labels) if not bulk_mode: # manually add new keywords to deprecated ones for old_id, new_id in YSO_DEPRECATED_MAPS.items(): try: old_keyword = Keyword.objects.get(id=old_id) new_keyword = Keyword.objects.get(id=new_id) except ObjectDoesNotExist: continue print('Manually mapping events with %s to %s' % (str(old_keyword), str(new_keyword))) new_keyword.events.add(*old_keyword.events.all()) new_keyword.audience_events.add( *old_keyword.audience_events.all()) queryset = Keyword.objects.filter(data_source=self.data_source, deprecated=False) syncher = ModelSyncher( queryset, lambda keyword: keyword.id, delete_func=lambda obj: deprecate_and_replace(graph, obj), check_deleted_func=lambda obj: obj.deprecated) save_set = set() for subject in graph.subjects(RDF.type, SKOS.Concept): self.save_keyword(syncher, graph, subject, keyword_labels, save_set) syncher.finish()