Python get_documents Examples, django_elasticsearch_dsl.registries.registry.get_documents Python Examples

Example #1

0

Show file

def remove_indexed_files(model, project_slug, version_slug=None, build_id=None):
    """
    Remove files from `version_slug` of `project_slug` from the search index.

    :param model: Class of the model to be deleted.
    :param project_slug: Project slug.
    :param version_slug: Version slug. If isn't given,
                    all index from `project` are deleted.
    :param build_id: Build id. If isn't given, all index from `version` are deleted.
    """

    log.bind(
        project_slug=project_slug,
        version_slug=version_slug,
    )

    if not DEDConfig.autosync_enabled():
        log.info('Autosync disabled, skipping removal from the search index.')
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        log.info('Deleting old files from search index.')
        documents = (
            document().search()
            .filter('term', project=project_slug)
        )
        if version_slug:
            documents = documents.filter('term', version=version_slug)
        if build_id:
            documents = documents.exclude('term', build=build_id)
        documents.delete()
    except Exception:
        log.exception('Unable to delete a subset of files. Continuing.')

Example #2

0

Show file

File: search_index.py Project: HelloLily/hellolily

    def handle_populate(self, models, options, connection):
        """
        Insert all given models in their respective indices.

        Args:
            models: An iterable with model classes.
            options: A dict with command line options.
            connection: The Elasticsearch connection.
        """
        for doc in registry.get_documents(models):
            qs = doc().get_queryset()

            if not options['current']:
                # We want to find and populate the latest index.
                indices = connection.indices.get('{}.*'.format(doc._doc_type.index)).keys()

                if not indices:
                    raise AttributeError('The index \'{}\' does not exist.'.format(doc._doc_type.index))

                indices.sort()
                doc._doc_type.index = indices[-1]  # Dirty hack to override the doctype meta, but it works.
            elif not connection.indices.exists(doc._doc_type.index):
                # We want to use the current index, which means we can use the
                # alias. If so, we do need to check the alias/index exists, or
                # we risk creating an index implicitly (which is a mess to
                # clean up).
                raise AttributeError('The index \'{}\' does not exist.'.format(doc._doc_type.index))

            self.stdout.write("Indexing {} '{}' objects to '{}'".format(
                qs.count(), doc._doc_type.model.__name__, doc._doc_type.index)
            )

            doc().update(qs)

Example #3

0

Show file

def search_bulk_index(model: Type[Model], qs: QuerySet, **kwargs):
    """Django orm bulk functions such as `bulk_create`, `bulk_index` and
    `update`do not send signals for the modified objects and therefore do not
    automatically update the elasticsearch index. This function therefore
    bulk-reindexes the changed objects."""
    [current_doc] = registry.get_documents([model])
    return current_doc().update(qs, **kwargs)

Example #4

0

Show file

File: utils.py Project: ramanans/readthedocs.org

def remove_indexed_files(model, version, build):
    """
    Remove files from the version from the search index.

    This excludes files from the current build.
    """

    if not DEDConfig.autosync_enabled():
        log.info(
            'Autosync disabled, skipping removal from the search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        log.info(
            'Deleting old files from search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        (document().search().filter(
            'term', project=version.project.slug).filter(
                'term', version=version.slug).exclude('term',
                                                      build=build).delete())
    except Exception:
        log.exception('Unable to delete a subset of files. Continuing.')

Example #5

0

Show file

def remove_indexed_file(sender, instance_list, **kwargs):
    """Remove deleted files from the build process."""

    if not instance_list:
        return

    model = sender
    document = list(registry.get_documents(models=[model]))[0]
    version = kwargs.get('version')
    commit = kwargs.get('commit')

    index_kwargs = {
        'app_label': model._meta.app_label,
        'model_name': model.__name__,
        'document_class': str(document),
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        delete_objects_in_es(**index_kwargs)

        if version and commit:
            # Sanity check by deleting all old files not in this commit
            log.info('Deleting old commits from search index')
            document().search().filter(
                'term',
                version=version.slug,
            ).filter(
                'term',
                project=version.project.slug,
            ).exclude(
                'term',
                commit=commit,
            ).delete()

Example #6

0

Show file

def index_new_files(model, version, build):
    """Index new files from the version into the search index."""

    if not DEDConfig.autosync_enabled():
        log.info(
            'Autosync disabled, skipping indexing into the search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        doc_obj = document()
        queryset = (
            doc_obj.get_queryset()
            .filter(project=version.project, version=version, build=build)
        )
        log.info(
            'Indexing new objecst into search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        doc_obj.update(queryset.iterator())
    except Exception:
        log.exception('Unable to index a subset of files. Continuing.')

Example #7

0

Show file

File: 0007_auto_20211119_1625.py Project: olekstomek/mcod-backend-dane.gov.pl

def update_hierarchy_labels(apps, schema_editor):
    region = apps.get_model('regions', 'Region')
    placeholder = PlaceholderApi()
    all_regions = region.objects.all()
    regions_ids = list(all_regions.values_list('region_id', flat=True))
    reg_data = placeholder.find_by_id(regions_ids)
    placeholder.add_hierarchy_labels(reg_data)
    for reg in all_regions:
        try:
            reg.i18n.update({
                'hierarchy_label_pl':
                reg_data[str(reg.region_id)]['hierarchy_label_pl'],
                'hierarchy_label_en':
                reg_data[str(reg.region_id)]['hierarchy_label_en']
            })
            reg.hierarchy_label = reg_data[str(
                reg.region_id)]['hierarchy_label_pl']
            reg.save()
        except KeyError:
            logger.debug(
                f'Couldn\'t update hierarchy label for region with id {reg.region_id}'
            )
    docs = registry.get_documents((region, ))
    default_reg = region.objects.get(region_id=settings.DEFAULT_REGION_ID)
    default_reg.i18n.update({
        'hierarchy_label_pl': 'Polska',
        'hierarchy_label_en': 'Poland'
    })
    default_reg.hierarchy_label = 'Polska'
    default_reg.save()
    for doc in docs:
        doc().update(all_regions)

Example #8

0

Show file

File: models.py Project: olekstomek/mcod-backend-dane.gov.pl

 def save_regions_data(values):
     main_regions = []
     additional_regions = []
     if values:
         placeholder = PlaceholderApi()
         reg_data, all_regions_ids = placeholder.get_all_regions_details(
             values)
         existing_regions = Region.objects.filter(
             region_id__in=all_regions_ids)
         existing_regions_ids = list(
             existing_regions.values_list('region_id', flat=True))
         to_create_regions_ids = set(all_regions_ids).difference(
             set(existing_regions_ids))
         to_create_regions = {
             reg_id: reg_data[str(reg_id)]
             for reg_id in to_create_regions_ids
         }
         created_regions = Region.objects.create_new_regions(
             to_create_regions)
         if created_regions:
             docs = registry.get_documents((Region, ))
             for doc in docs:
                 doc().update(created_regions)
         all_regions = list(existing_regions) + created_regions
         for reg in all_regions:
             if str(reg.region_id) in values:
                 main_regions.append(reg)
             else:
                 additional_regions.append(reg)
     return main_regions, additional_regions

Example #9

0

Show file

 def _populate(self, models, options):
     parallel = options['parallel']
     for doc in registry.get_documents(models):
         self.stdout.write("Indexing {} '{}' objects {}".format(
             doc().get_queryset().count() if options['count'] else "all",
             doc.django.model.__name__, "(parallel)" if parallel else ""))
         qs = doc().get_indexing_queryset()
         doc().update(qs, parallel=parallel)

Example #10

0

Show file

def update_index():

    models = registry.get_models()
    for doc in registry.get_documents(models):
        qs = doc().get_queryset()
        log.info('indexing {} "{}" objects'.format(
            qs.count(), doc._doc_type.model.__name__))
        doc().update(qs)

Example #11

0

Show file

File: reindex_elasticsearch.py Project: xianba/readthedocs.org

    def _run_reindex_tasks(self, models, queue):
        apply_async_kwargs = {'priority': 0}
        if queue:
            log.info('Adding indexing tasks to queue %s', queue)
            apply_async_kwargs['queue'] = queue
        else:
            log.info('Adding indexing tasks to default queue')

        index_time = timezone.now()
        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

        for doc in registry.get_documents(models):
            queryset = doc().get_queryset()
            # Get latest object from the queryset

            app_label = queryset.model._meta.app_label
            model_name = queryset.model.__name__

            index_name = doc._doc_type.index
            new_index_name = "{}_{}".format(index_name, timestamp)
            # Set index temporarily for indexing,
            # this will only get set during the running of this command
            doc._doc_type.index = new_index_name

            pre_index_task = create_new_es_index.si(app_label=app_label,
                                                    model_name=model_name,
                                                    index_name=index_name,
                                                    new_index_name=new_index_name)

            indexing_tasks = self._get_indexing_tasks(app_label=app_label, model_name=model_name,
                                                      queryset=queryset,
                                                      index_name=new_index_name,
                                                      document_class=str(doc))

            post_index_task = switch_es_index.si(app_label=app_label, model_name=model_name,
                                                 index_name=index_name,
                                                 new_index_name=new_index_name)

            # Task to run in order to add the objects
            # that has been inserted into database while indexing_tasks was running
            # We pass the creation time of latest object, so its possible to index later items
            missed_index_task = index_missing_objects.si(app_label=app_label,
                                                         model_name=model_name,
                                                         document_class=str(doc),
                                                         index_generation_time=index_time)

            # http://celery.readthedocs.io/en/latest/userguide/canvas.html#chords
            chord_tasks = chord(header=indexing_tasks, body=post_index_task)
            if queue:
                pre_index_task.set(queue=queue)
                chord_tasks.set(queue=queue)
                missed_index_task.set(queue=queue)
            # http://celery.readthedocs.io/en/latest/userguide/canvas.html#chain
            chain(pre_index_task, chord_tasks, missed_index_task).apply_async(**apply_async_kwargs)

            message = ("Successfully issued tasks for {}.{}, total {} items"
                       .format(app_label, model_name, queryset.count()))
            log.info(message)

Example #12

0

Show file

File: reindex_elasticsearch.py Project: chrisjsewell/readthedocs.org

    def _run_reindex_tasks(self, models, queue):
        apply_async_kwargs = {'priority': 0}
        if queue:
            log.info('Adding indexing tasks to queue {0}'.format(queue))
            apply_async_kwargs['queue'] = queue
        else:
            log.info('Adding indexing tasks to default queue')

        index_time = timezone.now()
        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

        for doc in registry.get_documents(models):
            queryset = doc().get_queryset()
            # Get latest object from the queryset

            app_label = queryset.model._meta.app_label
            model_name = queryset.model.__name__

            index_name = doc._doc_type.index
            new_index_name = "{}_{}".format(index_name, timestamp)
            # Set index temporarily for indexing,
            # this will only get set during the running of this command
            doc._doc_type.index = new_index_name

            pre_index_task = create_new_es_index.si(app_label=app_label,
                                                    model_name=model_name,
                                                    index_name=index_name,
                                                    new_index_name=new_index_name)

            indexing_tasks = self._get_indexing_tasks(app_label=app_label, model_name=model_name,
                                                      queryset=queryset,
                                                      index_name=new_index_name,
                                                      document_class=str(doc))

            post_index_task = switch_es_index.si(app_label=app_label, model_name=model_name,
                                                 index_name=index_name,
                                                 new_index_name=new_index_name)

            # Task to run in order to add the objects
            # that has been inserted into database while indexing_tasks was running
            # We pass the creation time of latest object, so its possible to index later items
            missed_index_task = index_missing_objects.si(app_label=app_label,
                                                         model_name=model_name,
                                                         document_class=str(doc),
                                                         index_generation_time=index_time)

            # http://celery.readthedocs.io/en/latest/userguide/canvas.html#chords
            chord_tasks = chord(header=indexing_tasks, body=post_index_task)
            if queue:
                pre_index_task.set(queue=queue)
                chord_tasks.set(queue=queue)
                missed_index_task.set(queue=queue)
            # http://celery.readthedocs.io/en/latest/userguide/canvas.html#chain
            chain(pre_index_task, chord_tasks, missed_index_task).apply_async(**apply_async_kwargs)

            message = ("Successfully issued tasks for {}.{}, total {} items"
                       .format(app_label, model_name, queryset.count()))
            log.info(message)

Example #13

0

Show file

    def setUpTestData(cls):
        super(ElasticsearchApiTestCase, cls).setUpTestData()

        # Check if this model has Elasticsearch mappings and take one of them
        # to test with (unless a custom es_doc_type has been defined).
        if not cls.es_doc_type:
            doc_types = registry.get_documents(cls.model_cls)
            if doc_types:
                cls.es_doc_type = next(iter(doc_types))

Example #14

0

Show file

File: tasks.py Project: hzlf/openbroadcast.org

def update_index():

    models = registry.get_models()
    for doc in registry.get_documents(models):
        qs = doc().get_queryset()
        log.info('indexing {} "{}" objects'.format(
            qs.count(), doc._doc_type.model.__name__)
        )
        doc().update(qs)

Example #15

0

Show file

File: signal_processor.py Project: stefanw/froide

 def teardown(self):
     # Listen to all model saves.
     for doc in registry.get_documents():
         if getattr(doc, 'special_signals', False):
             continue
         model = doc._doc_type.model
         models.signals.post_save.disconnect(self.handle_save, sender=model)
         models.signals.post_delete.disconnect(self.handle_delete, sender=model)
         models.signals.m2m_changed.disconnect(self.handle_m2m_changed, sender=model)
         models.signals.pre_delete.disconnect(self.handle_pre_delete, sender=model)

Example #16

0

Show file

File: signal_processor.py Project: stefanw/froide

    def setup(self):
        for doc in registry.get_documents():
            if getattr(doc, 'special_signals', False):
                continue
            model = doc._doc_type.model
            models.signals.post_save.connect(self.handle_save, sender=model)
            models.signals.post_delete.connect(self.handle_delete, sender=model)

            models.signals.m2m_changed.connect(self.handle_m2m_changed, sender=model)
            models.signals.pre_delete.connect(self.handle_pre_delete, sender=model)

Example #17

0

Show file

    def setup(self):
        for doc in registry.get_documents():
            if getattr(doc, 'special_signals', False):
                continue
            model = doc.django.model
            models.signals.post_save.connect(self.handle_save, sender=model)
            models.signals.post_delete.connect(self.handle_delete, sender=model)

            models.signals.m2m_changed.connect(self.handle_m2m_changed, sender=model)
            models.signals.pre_delete.connect(self.handle_pre_delete, sender=model)

Example #18

0

Show file

 def teardown(self):
     # Listen to all model saves.
     for doc in registry.get_documents():
         if getattr(doc, 'special_signals', False):
             continue
         model = doc.django.model
         models.signals.post_save.disconnect(self.handle_save, sender=model)
         models.signals.post_delete.disconnect(self.handle_delete, sender=model)
         models.signals.m2m_changed.disconnect(self.handle_m2m_changed, sender=model)
         models.signals.pre_delete.disconnect(self.handle_pre_delete, sender=model)

Example #19

0

Show file

File: tasks.py Project: olekstomek/mcod-backend-dane.gov.pl

def update_related_task(app_label, object_name, pk_set):
    model = apps.get_model(app_label, object_name)
    docs = registry.get_documents((model,))
    for doc in docs:
        qs = model.objects.filter(pk__in=pk_set)
        doc().update(qs.iterator())
    return {
        'app': model._meta.app_label,
        'model': model._meta.object_name,
        'instance_id': pk_set
    }

Example #20

0

Show file

File: models.py Project: HelloLily/hellolily

    def __init__(self, model=None, query=None, using=None, hints=None, search=None):
        super(ElasticQuerySet, self).__init__(model=model, query=query, using=using, hints=hints)
        if search is None:
            doc_types = registry.get_documents([model])

            search = Search(
                index=[doc_type._doc_type.index for doc_type in doc_types],
                doc_type=list(doc_types)
            ).source(include=[])[0:10000]
        self.search = search
        self._total = None

Example #21

0

Show file

File: update_index.py Project: eduNEXT/course-discovery

    def _update(self, models, options):
        """
        Update indices with sanity check.

        Will be created a new index and populate with data.
        The index will be masked with previous one to prevent missing data.
        """

        alias_mappings = []
        for document in registry.get_documents(models):
            # pylint: disable=protected-access
            index = document._index
            record_count = self.get_record_count(document)
            alias, new_index_name = self.prepare_backend_index(index)
            alias_mappings.append(
                AliasMapper(document, index, new_index_name, alias,
                            record_count))
        # Set the alias (from settings) to the timestamped catalog.
        run_attempts = 0
        indexes_pending = {
            key: ''
            for key in [x.new_index_name for x in alias_mappings]
        }
        conn = get_connection()
        while indexes_pending and run_attempts < 1:  # Only try once, as retries gave buggy results. See VAN-391
            run_attempts += 1
            self._populate(models, options)
            for doc, __, new_index_name, alias, record_count in alias_mappings:
                # Run a sanity check to ensure we aren't drastically changing the
                # index, which could be indicative of a bug.
                if new_index_name in indexes_pending and not options.get(
                        'disable_change_limit', False):
                    record_count_is_sane, index_info_string = self.sanity_check_new_index(
                        run_attempts, doc, new_index_name, record_count)
                    if record_count_is_sane:
                        ElasticsearchUtils.set_alias(conn, alias,
                                                     new_index_name)
                        indexes_pending.pop(new_index_name, None)
                    else:
                        indexes_pending[new_index_name] = index_info_string
                else:
                    ElasticsearchUtils.set_alias(conn, alias, new_index_name)
                    indexes_pending.pop(new_index_name, None)

        for index_alias_mapper in alias_mappings:
            index_alias_mapper.registered_index._name = index_alias_mapper.alias  # pylint: disable=protected-access

        if indexes_pending:
            raise CommandError(
                'Sanity check failed for the new index(es): {}'.format(
                    indexes_pending))

        return True

Example #22

0

Show file

File: helpers.py Project: ervinpepic/Kodecta_media_catalog

def get_document_for_model(model):
    """Get document for model given.

    :param model: Model to get document index for.
    :type model: Subclass of `django.db.models.Model`.
    :return: Document index for the given model.
    :rtype: Subclass of `django_elasticsearch_dsl.Document`.
    """
    documents = registry.get_documents()
    for document in documents:
        if model == document.Django.model:
            return document

Example #23

0

Show file

File: tasks.py Project: olekstomek/mcod-backend-dane.gov.pl

def null_field_in_related_task(app_label, object_name, instance_id):
    instance = _instance(app_label, object_name, instance_id)
    for rel in instance._meta.related_objects:
        field_name = rel.field.name
        model = rel.field.model
        rel_instances = model.objects.filter(**{
            rel.field.name: instance
        })
        doc = list(registry.get_documents(models=[model, ]))[0]
        for rel_inst in rel_instances:
            setattr(rel_inst, field_name, None)
            doc_instance = doc(related_instance_to_ignore=instance)
            doc_instance.update(rel_inst)

Example #24

0

Show file

def _get_document(model, document_class):
    """
    Get DED document class object from the model and name of document class.

    :param model: The model class to find the document
    :param document_class: the name of the document class.
    :return: DED DocType object
    """
    documents = registry.get_documents(models=[model])

    for document in documents:
        if str(document) == document_class:
            return document

Example #25

0

Show file

File: update_es_index.py Project: mridu-enigma/refinebio

    def handle(self, *args, **options):
        """This command is based off of the 'populate' command of Django ES DSL:

        https://github.com/sabricot/django-elasticsearch-dsl/blob/f6b2e0694e4ed69826c824196ccec5863874c856/django_elasticsearch_dsl/management/commands/search_index.py#L86

        We have updated it so that it will do incremental updates
        rather than looping over the full queryset every time.
        """
        models = set(registry.get_models())

        for doc in registry.get_documents(models):
            start_time = timezone.now() - UPDATE_WINDOW
            qs = doc().get_queryset().filter(last_modified__gt=start_time).order_by("id")
            self.stdout.write("Indexing {} '{}' objects".format(qs.count(), qs.model.__name__))
            doc().update(qs)

Example #26

0

Show file

File: product.py Project: aris-creator/django

 def update_search_index(self):
     """
     Update the Document inside the Elasticsearch index after changing relevant parts
     of the product.
     """
     documents = elasticsearch_registry.get_documents([ProductModel])
     if settings.USE_I18N:
         for language, _ in settings.LANGUAGES:
             try:
                 document = next(doc for doc in documents if doc._language == language)
             except StopIteration:
                 document = next(doc for doc in documents if doc._language is None)
             document().update(self)
     else:
         document = next(doc for doc in documents)
         document().update(self)

Example #27

0

Show file

def index_indexed_file(sender, instance_list, **kwargs):
    """Handle indexing from the build process."""

    if not instance_list:
        return

    model = sender
    document = list(registry.get_documents(models=[model]))[0]
    index_kwargs = {
        'app_label': model._meta.app_label,
        'model_name': model.__name__,
        'document_class': str(document),
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        index_objects_to_es(**index_kwargs)

Example #28

0

Show file

    def __init__(self,
                 model=None,
                 query=None,
                 using=None,
                 hints=None,
                 search=None):
        super(ElasticQuerySet, self).__init__(model=model,
                                              query=query,
                                              using=using,
                                              hints=hints)
        if search is None:
            doc_types = registry.get_documents([model])

            search = Search(
                index=[doc_type._doc_type.index for doc_type in doc_types],
                doc_type=list(doc_types)).source(include=[])[0:10000]
        self.search = search
        self._total = None

Example #29

0

Show file

 def _populate(self, models, options):
     docs = {
         doc._doc_type.index: doc
         for doc in registry.get_documents()
         if doc._doc_type.model in models
     }
     for index in settings.ELASTICSEARCH_INDEX_NAMES.values():
         try:
             doc = docs[index]
             qs = doc().get_queryset()
             self.stdout.write("Indexing {} '{}' objects".format(
                 qs.count(), doc._doc_type.model.__name__))
             start = datetime.now()
             doc().update(qs.iterator(), chunk_size=1000)
             finish = datetime.now()
             print('Time: {}'.format(finish - start))
         except KeyError:
             pass

Example #30

0

Show file

    def _reindex_files_from(self, days_ago, queue=None):
        """Reindex HTML files from versions with recent builds."""
        chunk_size = settings.ES_TASK_CHUNK_SIZE
        since = datetime.now() - timedelta(days=days_ago)
        queryset = Version.objects.filter(builds__date__gte=since).distinct()
        app_label = HTMLFile._meta.app_label
        model_name = HTMLFile.__name__
        apply_async_kwargs = {
            'kwargs': {
                'app_label': app_label,
                'model_name': model_name,
            },
        }
        if queue:
            apply_async_kwargs['queue'] = queue

        for doc in registry.get_documents(models=[HTMLFile]):
            apply_async_kwargs['kwargs']['document_class'] = str(doc)
            for version in queryset.iterator():
                project = version.project
                files_qs = (HTMLFile.objects.filter(
                    version=version).values_list('pk', flat=True).iterator())
                current = 0
                while True:
                    objects_id = list(itertools.islice(files_qs, chunk_size))
                    if not objects_id:
                        break
                    current += len(objects_id)
                    log.info(
                        'Re-indexing files. version=%s:%s total=%s',
                        project.slug,
                        version.slug,
                        current,
                    )
                    apply_async_kwargs['kwargs']['objects_id'] = objects_id
                    index_objects_to_es.apply_async(**apply_async_kwargs)

                log.info(
                    "Tasks issued successfully. version=%s:%s items=%s",
                    project.slug,
                    version.slug,
                    str(current),
                )

Example #31

0

Show file

File: search_index.py Project: olekstomek/mcod-backend-dane.gov.pl

    def _get_docs(self, models):
        _last_doc = None
        _logentries_doc = None
        _docs = []

        for doc in registry.get_documents(models):
            # Move history index
            if doc.Index.name == 'histories':
                _last_doc = doc
            elif doc.Index.name == 'logentries':
                _logentries_doc = doc
            else:
                _docs.append(doc)

        if _last_doc:
            _docs.append(_last_doc)
        if _logentries_doc:
            _docs.append(_logentries_doc)
        return _docs

Example #32

0

Show file

    def _run_reindex_tasks(self, models, queue):
        apply_async_kwargs = {'queue': queue}
        log.info('Adding indexing tasks to queue.', queue=queue)

        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

        for doc in registry.get_documents(models):
            queryset = doc().get_queryset()

            app_label = queryset.model._meta.app_label
            model_name = queryset.model.__name__

            index_name = doc._index._name
            new_index_name = "{}_{}".format(index_name, timestamp)

            # Set and create a temporal index for indexing.
            create_new_es_index(
                app_label=app_label,
                model_name=model_name,
                index_name=index_name,
                new_index_name=new_index_name,
            )
            doc._index._name = new_index_name
            log.info('Temporal index created.', index_name=new_index_name)

            indexing_tasks = self._get_indexing_tasks(
                app_label=app_label,
                model_name=model_name,
                queryset=queryset,
                index_name=new_index_name,
                document_class=str(doc),
            )
            for task in indexing_tasks:
                task.apply_async(**apply_async_kwargs)

            log.info(
                "Tasks issued successfully.",
                model_name=model_name,
                app_label=app_label,
                items=queryset.count(),
            )
        return timestamp

Example #33

0

Show file

File: reindex_elasticsearch.py Project: scatenag/readthedocs.org

    def _reindex_projects_from(self, days_ago, queue):
        """Reindex projects with recent changes."""
        since = datetime.now() - timedelta(days=days_ago)
        queryset = Project.objects.filter(modified_date__gte=since).distinct()
        app_label = Project._meta.app_label
        model_name = Project.__name__
        apply_async_kwargs = {'queue': queue}

        for doc in registry.get_documents(models=[Project]):
            indexing_tasks = self._get_indexing_tasks(
                app_label=app_label,
                model_name=model_name,
                queryset=queryset,
                index_name=doc._index._name,
                document_class=str(doc),
            )
            for task in indexing_tasks:
                task.apply_async(**apply_async_kwargs)
            log.info("Tasks issued successfully. model=%s.%s items=%s",
                     app_label, model_name, str(queryset.count()))

Example #34

0

Show file

File: reindex_elasticsearch.py Project: scatenag/readthedocs.org

 def _change_index(self, models, timestamp):
     for doc in registry.get_documents(models):
         queryset = doc().get_queryset()
         app_label = queryset.model._meta.app_label
         model_name = queryset.model.__name__
         index_name = doc._index._name
         new_index_name = "{}_{}".format(index_name, timestamp)
         switch_es_index(
             app_label=app_label,
             model_name=model_name,
             index_name=index_name,
             new_index_name=new_index_name,
         )
         log.info(
             "Index name changed. model=%s.%s from=%s to=%s",
             app_label,
             model_name,
             new_index_name,
             index_name,
         )

Example #35

0

Show file

File: fixes.py Project: olekstomek/mcod-backend-dane.gov.pl

 def fix_resources_links_protocol(self):
     self.stdout.write('Reading resource data from https_protocol_report')
     latest_report = None
     try:
         latest_report = Report.objects.filter(
             file__contains='http_protocol_resources').latest('created')
     except Report.DoesNotExist:
         self.stdout.write(
             'No http_protocol_resources report,'
             ' you need to generate report first with: manage.py create_https_protocol_report.'
         )
     if latest_report:
         file_path = latest_report.file
         self.stdout.write(f'Reading data from report: {file_path}')
         full_path = str(settings.ROOT_DIR) + file_path
         with open(full_path) as csvfile:
             report_data = csv.reader(csvfile, delimiter=',')
             next(report_data, None)
             resources_ids = [
                 row[0] for row in report_data
                 if 'Wymagana poprawa' in row[2]
             ]
         self.stdout.write(
             f'Found {len(resources_ids)} resources to update link protocol.'
         )
         edited_resources = []
         resources = Resource.objects.filter(pk__in=resources_ids,
                                             link__contains='http://')
         edited_ids = []
         for res in resources:
             old_link = res.link
             res.link = old_link.replace('http://', 'https://')
             edited_resources.append(res)
             edited_ids.append(res.pk)
         self.stdout.write('Attempting to update resources in db and ES.')
         Resource.objects.bulk_update(edited_resources, ['link'])
         docs = registry.get_documents((Resource, ))
         for doc in docs:
             self.stdout.write(f'Updating document {doc} in ES')
             doc().update(Resource.objects.filter(pk__in=edited_ids))
         self.stdout.write(f'Updated {resources.count()} resources.')

Example #36

0

Show file

File: common.py Project: yasir1brahim/course-discovery

    def get_model_object_by_instance(self, instance):
        """
        Provide Model object by elasticsearch response instance.
        """
        document = None
        _object = None
        index_or_alias_name = ElasticsearchUtils.get_alias_by_index_name(
            instance.meta.index)
        for doc in registry.get_documents():
            if index_or_alias_name == doc._index._name:  # pylint: disable=protected-access
                document = doc
                break
        hit = self._build_hit(instance)
        es_pk = hit['_source'].get('pk')
        if document and es_pk:
            try:
                _object = document(hit).get_queryset().get(pk=es_pk)
            except ObjectDoesNotExist:
                log.error(
                    "Object could not be found in database for SearchResult '%r'.",
                    self)

        return _object