Beispiel #1
0
def remove_indexed_files(model, version, build):
    """
    Remove files from the version from the search index.

    This excludes files from the current build.
    """

    if not DEDConfig.autosync_enabled():
        log.info(
            'Autosync disabled, skipping removal from the search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        log.info(
            'Deleting old files from search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        (document().search().filter(
            'term', project=version.project.slug).filter(
                'term', version=version.slug).exclude('term',
                                                      build=build).delete())
    except Exception:
        log.exception('Unable to delete a subset of files. Continuing.')
    def get_data_of_related_instances(self, instance):
        if not DEDConfig.autosync_enabled():
            return []

        related_instances = set()
        for doc in self._get_related_doc(instance):
            doc_instance = doc()
            try:
                related = doc_instance.get_instances_from_related(instance)
                if isinstance(related, models.Model):
                    related_instances.add(related)
                else:
                    related_instances.update(related)
            except ObjectDoesNotExist:
                pass

        data = []
        for obj in related_instances:
            if not obj.is_removed and not obj.is_permanently_removed:
                meta = obj._meta
                data.append({
                    'app_label': meta.app_label,
                    'object_name': meta.concrete_model._meta.object_name,
                    'instance_id': obj.id,
                })

        return data
Beispiel #3
0
def remove_indexed_files(model, project_slug, version_slug=None, build_id=None):
    """
    Remove files from `version_slug` of `project_slug` from the search index.

    :param model: Class of the model to be deleted.
    :param project_slug: Project slug.
    :param version_slug: Version slug. If isn't given,
                    all index from `project` are deleted.
    :param build_id: Build id. If isn't given, all index from `version` are deleted.
    """

    log.bind(
        project_slug=project_slug,
        version_slug=version_slug,
    )

    if not DEDConfig.autosync_enabled():
        log.info('Autosync disabled, skipping removal from the search index.')
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        log.info('Deleting old files from search index.')
        documents = (
            document().search()
            .filter('term', project=project_slug)
        )
        if version_slug:
            documents = documents.filter('term', version=version_slug)
        if build_id:
            documents = documents.exclude('term', build=build_id)
        documents.delete()
    except Exception:
        log.exception('Unable to delete a subset of files. Continuing.')
Beispiel #4
0
    def _handle_related(self, instance) -> None:
        """
        Handle related instances changing by sending a group of tasks,
        assuming 'get_instances_from_related' document method always returns list of ids or None
        """

        if not DEDConfig.autosync_enabled():
            return

        sync_group = []
        for doc in registry._get_related_doc(instance):
            doc_instance = doc(related_instance_to_ignore=instance)
            related_model = doc_instance.Django.model
            if (not self._is_sync_allowed(related_model) or not isinstance(
                    instance, doc_instance.Django.related_models)):
                continue
            related = doc_instance.get_instances_from_related(instance)
            if related is None:
                continue
            task = related_model.get_sync_task()
            sync_group += [task.s(obj_id) for obj_id in related]
        if not sync_group:
            return
        sync_group = group(sync_group)
        connection = transaction.get_connection()
        if not connection.in_atomic_block:
            sync_group()
        else:
            transaction.on_commit(lambda: sync_group())
Beispiel #5
0
def remove_indexed_file(sender, instance_list, **kwargs):
    """Remove deleted files from the build process."""

    if not instance_list:
        return

    model = sender
    document = list(registry.get_documents(models=[model]))[0]
    version = kwargs.get('version')
    commit = kwargs.get('commit')

    index_kwargs = {
        'app_label': model._meta.app_label,
        'model_name': model.__name__,
        'document_class': str(document),
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        delete_objects_in_es(**index_kwargs)

        if version and commit:
            # Sanity check by deleting all old files not in this commit
            log.info('Deleting old commits from search index')
            document().search().filter(
                'term',
                version=version.slug,
            ).filter(
                'term',
                project=version.project.slug,
            ).exclude(
                'term',
                commit=commit,
            ).delete()
Beispiel #6
0
def index_new_files(model, version, build):
    """Index new files from the version into the search index."""

    if not DEDConfig.autosync_enabled():
        log.info(
            'Autosync disabled, skipping indexing into the search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        return

    try:
        document = list(registry.get_documents(models=[model]))[0]
        doc_obj = document()
        queryset = (
            doc_obj.get_queryset()
            .filter(project=version.project, version=version, build=build)
        )
        log.info(
            'Indexing new objecst into search index for: %s:%s',
            version.project.slug,
            version.slug,
        )
        doc_obj.update(queryset.iterator())
    except Exception:
        log.exception('Unable to index a subset of files. Continuing.')
    def delete_documents_by_model_and_id(self, model, _id, **kwargs):
        if not DEDConfig.autosync_enabled():
            return

        if model in self._models:
            for doc in self._models[model]:
                if not doc.django.ignore_signals:
                    doc().delete_by_id(_id, **kwargs)
Beispiel #8
0
def remove_project_delete(instance, *args, **kwargs):
    from readthedocs.search.documents import ProjectDocument
    kwargs = {
        'app_label': Project._meta.app_label,
        'model_name': Project.__name__,
        'document_class': str(ProjectDocument),
        'objects_id': [instance.id],
    }

    # Don't `delay` this because the objects will be deleted already
    if DEDConfig.autosync_enabled():
        delete_objects_in_es(**kwargs)
Beispiel #9
0
def index_html_file(instance_list, **_):
    kwargs = {
        'app_label': HTMLFile._meta.app_label,
        'model_name': HTMLFile.__name__,
        'document_class': str(PageDocument),
        'index_name': None,  # No need to change the index name
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        index_objects_to_es(**kwargs)
Beispiel #10
0
def index_project(instance, *args, **kwargs):
    kwargs = {
        'app_label': Project._meta.app_label,
        'model_name': Project.__name__,
        'document_class': str(ProjectDocument),
        'index_name': None,  # No need to change the index name
        'objects_id': [instance.id],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        index_objects_to_es.delay(**kwargs)
Beispiel #11
0
def remove_project_delete(instance, *args, **kwargs):
    from readthedocs.search.documents import ProjectDocument
    kwargs = {
        'app_label': Project._meta.app_label,
        'model_name': Project.__name__,
        'document_class': str(ProjectDocument),
        'objects_id': [instance.id],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        delete_objects_in_es.delay(**kwargs)
Beispiel #12
0
def remove_html_file(instance_list, **_):
    """Remove deleted files from the build process."""
    from readthedocs.search.documents import PageDocument
    kwargs = {
        'app_label': HTMLFile._meta.app_label,
        'model_name': HTMLFile.__name__,
        'document_class': str(PageDocument),
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        delete_objects_in_es(**kwargs)
Beispiel #13
0
def index_indexed_file(sender, instance_list, **kwargs):
    """Handle indexing from the build process."""

    if not instance_list:
        return

    model = sender
    document = list(registry.get_documents(models=[model]))[0]
    index_kwargs = {
        'app_label': model._meta.app_label,
        'model_name': model.__name__,
        'document_class': str(document),
        'objects_id': [obj.id for obj in instance_list],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        index_objects_to_es(**index_kwargs)
Beispiel #14
0
def index_html_file_save(instance, *args, **kwargs):
    """
    Save a HTMLFile instance based on the post_save signal.post_save.

    This uses Celery to do it async, replacing how django-elasticsearch-dsl does
    it.
    """
    from readthedocs.search.documents import PageDocument
    kwargs = {
        'app_label': HTMLFile._meta.app_label,
        'model_name': HTMLFile.__name__,
        'document_class': str(PageDocument),
        'objects_id': [instance.id],
    }

    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        index_objects_to_es.delay(**kwargs)
Beispiel #15
0
 def _is_sync_allowed(sender) -> bool:
     return DEDConfig.autosync_enabled() and issubclass(
         sender, SyncTaskMixin)
Beispiel #16
0
def remove_html_file(instance_list, **_):
    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        registry.delete(instance_list)
Beispiel #17
0
def remove_project(instance, *args, **kwargs):
    # Do not index if autosync is disabled globally
    if DEDConfig.autosync_enabled():
        registry.delete(instance)