Esempio n. 1
0
    def save_keywords(self, graph):
        if self.verbosity >= 2:
            print("Saving data")
        data_source = DataSource.objects.get(pk='yso')

        assert not Keyword.objects.filter(
            data_source=self.data_source).exists()
        bulk_mode = True
        if not bulk_mode:
            delete_func = lambda obj: obj.delete()
            queryset = KeywordLabel.objects.filter(
                data_source=self.data_source)
            label_syncher = ModelSyncher(queryset,
                                         lambda obj:
                                         (obj.name, obj.language_id),
                                         delete_func=delete_func)

        keyword_labels = {}
        labels_to_create = set()
        for subject, label in graph.subject_objects(SKOS.altLabel):
            if (subject, RDF.type, SKOS.Concept) in graph:
                yid = self.yso_id(subject)
                if bulk_mode:
                    if label.language is not None:
                        language = label.language
                        if label.language == 'se':
                            # YSO doesn't contain se, assume an error.
                            language = 'sv'
                        labels_to_create.add((str(label), language))
                        keyword_labels.setdefault(yid, []).append(label)
                else:
                    label = self.save_alt_label(label_syncher, graph, label,
                                                data_source)
                    if label:
                        keyword_labels.setdefault(yid, []).append(label)

        if bulk_mode:
            KeywordLabel.objects.bulk_create([
                KeywordLabel(name=name, language_id=language)
                for name, language in labels_to_create
            ])
        else:
            label_syncher.finish()

        if bulk_mode:
            # self.save_labels_in_bulk(graph, data_source)
            self.save_keywords_in_bulk(graph, data_source)
            self.save_keyword_label_relationships_in_bulk(keyword_labels)

        if not bulk_mode:
            queryset = Keyword.objects.filter(data_source=self.data_source)
            syncher = ModelSyncher(queryset,
                                   lambda obj: obj.url,
                                   delete_func=delete_func)
            save_set = set()
            for subject in graph.subjects(RDF.type, SKOS.Concept):
                self.save_keyword(syncher, graph, subject, data_source,
                                  keyword_labels, save_set)
            syncher.finish()
Esempio n. 2
0
    def save_alt_label(self, syncher, graph, label):
        label_text = str(label)
        if label.language is None:
            print('Error:', str(label), 'has no language')
            return None
        label_object = syncher.get((label_text, str(label.language)))
        if label_object is None:
            language = Language.objects.get(id=label.language)
            label_object = KeywordLabel(name=label_text, language=language)
            label_object._changed = True
            label_object._created = True
        else:
            label_object._created = False
        if label_object._created:
            # Since there are duplicates, only save & mark them once.
            label_object.save()

        if not getattr(label_object, '_found', False):
            syncher.mark(label_object)
        return label_object
Esempio n. 3
0
    def save_alt_label(self, syncher, graph, label, data_source):
        label_text = str(label)
        if label.language is None:
            print('Error:', str(label), 'has no language')
            return None
        label_object = syncher.get((label_text, str(label.language)))
        if label_object is None:
            language = Language.objects.get(id=label.language)
            label_object = KeywordLabel(
                name=label_text, language=language, data_source=data_source)
            label_object._changed = True
            label_object._created = True
        else:
            label_object._created = False
        if label_object._created:
            # Since there are duplicates, only save & mark them once.
            label_object.save()

        if not getattr(label_object, '_found', False):
            syncher.mark(label_object)
        return label_object
Esempio n. 4
0
    def save_keywords(self, graph):
        if self.verbosity >= 2:
            print("Saving data")

        bulk_mode = False
        if bulk_mode:
            assert not Keyword.objects.filter(
                data_source=self.data_source).exists()
        if not bulk_mode:
            queryset = KeywordLabel.objects.all()
            label_syncher = ModelSyncher(queryset,
                                         lambda obj:
                                         (obj.name, obj.language_id),
                                         delete_func=lambda obj: obj.delete())

        keyword_labels = {}
        labels_to_create = set()
        for subject, label in graph.subject_objects(SKOS.altLabel):
            if (subject, RDF.type, SKOS.Concept) in graph:
                yid = get_yso_id(subject)
                if bulk_mode:
                    if label.language is not None:
                        language = label.language
                        if label.language == 'se':
                            # YSO doesn't contain se, assume an error.
                            language = 'sv'
                        labels_to_create.add((str(label), language))
                        keyword_labels.setdefault(yid, []).append(label)
                else:
                    label = self.save_alt_label(label_syncher, graph, label)
                    if label:
                        keyword_labels.setdefault(yid, []).append(label)

        if bulk_mode:
            KeywordLabel.objects.bulk_create([
                KeywordLabel(name=name, language_id=language)
                for name, language in labels_to_create
            ])
        else:
            label_syncher.finish()

        if bulk_mode:
            self.save_keywords_in_bulk(graph)
            self.save_keyword_label_relationships_in_bulk(keyword_labels)

        if not bulk_mode:
            # manually add new keywords to deprecated ones
            for old_id, new_id in YSO_DEPRECATED_MAPS.items():
                try:
                    old_keyword = Keyword.objects.get(id=old_id)
                    new_keyword = Keyword.objects.get(id=new_id)
                except ObjectDoesNotExist:
                    continue
                print('Manually mapping events with %s to %s' %
                      (str(old_keyword), str(new_keyword)))
                new_keyword.events.add(*old_keyword.events.all())
                new_keyword.audience_events.add(
                    *old_keyword.audience_events.all())

            queryset = Keyword.objects.filter(data_source=self.data_source,
                                              deprecated=False)
            syncher = ModelSyncher(
                queryset,
                lambda keyword: keyword.id,
                delete_func=lambda obj: deprecate_and_replace(graph, obj),
                check_deleted_func=lambda obj: obj.deprecated)
            save_set = set()
            for subject in graph.subjects(RDF.type, SKOS.Concept):
                self.save_keyword(syncher, graph, subject, keyword_labels,
                                  save_set)
            syncher.finish()