コード例 #1
0
    def _get_active_jobs(self):
        """
        Query the database for active snapshot jobs.

        Each DocumentsLanguages record corresponds to a snpashot "job." Each job
        may have zero or more Snapshot records which are queried and returned
        in a separate method.

        Returns:
            iterable: When not empty, elements are DocumentsLanguages model
            instances.

        Raises:
            django.core.management.base.CommandError: If exception is raised by
                underlying database library.

        See:
            https://docs.djangoproject.com/en/dev/topics/db/sql/#adding-annotations

        """
        self.stdout.write('Querying enabled snapshot jobs: ', ending='')

        try:
            docsnaps_set = django_docsnaps.models.DocumentsLanguages.objects\
                .filter(is_enabled=True)
        except django.db.Error as exception:
            command_utils.raise_command_error(
                self.stdout, 'A database error occurred: ' + str(exception))

        self.stdout.write(self.style.SUCCESS('success'))
        return docsnaps_set
コード例 #2
0
    def _import_job_module(self, job):
        """
        Attempt to import the job's module.

        Each job (DocumentsLanguages model instance) is associated with a Python
        module. This module is responsible for, at the very least, deciding if
        a new document snapshot needs to be saved and if any transformations to
        the document text need to be applied.

        Args:
            job (django_docsnaps.models.DocumentsLanguages): A
                DocumentsLanguages model instance. This model class represents
                a snapshot job on which is_enabled=True.

        Returns:
            module: The Python module object returned by importlib.

        Raises:
            django.core.management.base.CommandError: If module cannot be
                imported.

        """
        try:
            module = importlib.import_module(job.document_id.module)
        except ImportError as exception:
            exception_message = (
                'The module "{!s}" for snapshot job "{!s}" could not be '
                'imported.')
            exception_message = exception_message.format(
                job.document_id.module, job.document_id.name)
            command_utils.raise_command_error(self.stdout, exception_message)

        return module
コード例 #3
0
    def _validate_module_interface(self, module):
        """
        Check module interface of imported module.

        The goal in validating the module is not to attempt to circumvent
        Pythonic duck typing, but to generate helpful error messages for plugin
        developers.

        Args:
            module: The imported plugin module as returned by importlib.

        Raises:
            django.core.management.base.CommandError: If module is invalid.

        """
        self.stdout.write('Validating module interface: ', ending='')

        necessary_callables = ['get_models', 'transform']
        error_message = None
        for callable_name in necessary_callables:
            if not hasattr(module, callable_name):
                error_message = 'No attribute "{!s}" in {!s}.'
            elif not callable(getattr(module, callable_name)):
                error_message = '"{!s}" is not callable in {!s}.'
            if error_message:
                error_message = error_message.format(callable_name,
                                                     module.__name__)
                command_utils.raise_command_error(self.stdout, error_message)

        self.stdout.write(self.style.SUCCESS('success'))
コード例 #4
0
    def _load_models(self, module, disabled=False):
        """
        Load the module's data.

        All of the module's data is loaded in a transaction.

        Only one exception can be in a thread at a given time. Defer raising
        django.core.management.base.CommandError until after exiting try:except
        block.

        Args:
            module: The imported plugin module as returned by importlib.
            disabled (bool): The value of the subcommand's "disabled" flag.

        Raises:
            django.core.management.base.CommandError If loading fails.

        """
        self.stdout.write('Attempting to load module data: ', ending='')
        command_error_message = None
        try:
            with django.db.transaction.atomic():
                for model in module.get_models():
                    model_loader = ModelLoader(model, disabled=disabled)
        except django.core.exceptions.MultipleObjectsReturned:
            command_error_message = (
                'Multiple identical records returned for this module\'s data.'
                'This indicates a data integrity violation in the database.')
        except django.db.Error as exception:
            command_error_message = ('A database error occurred: ' +
                                     str(exception))

        if command_error_message:
            command_utils.raise_command_error(self.stdout,
                                              command_error_message)

        self.stdout.write(self.style.SUCCESS('success'))
        for warning in model_loader.warnings:
            self.stdout.write(self.style.WARNING('warning: ') + warning)
コード例 #5
0
    def _import_module(self, module_name):
        """
        Attempt to load the module passed as an argument to the subcommand.

        Args:
            module_name (string): The fully-qualified, absolute name of the
                plugin module.

        Returns:
            module: The module returned by importlib.import_module if
                successful.

        Raises:
            django.core.management.base.CommandError: If import fails.

        """
        self.stdout.write('Attempting to load module: ', ending='')
        try:
            module = importlib.import_module(module_name)
        except ImportError as import_error:
            command_utils.raise_command_error(self.stdout, import_error)
        else:
            self.stdout.write(self.style.SUCCESS('success'))
            return module
コード例 #6
0
    async def _get_latest_snapshots(self):
        """
        Get the latest document snapshot for each active job.

        This was defined in its own method for a number of reasons.

        As the docsnaps system runs and accumulates potentially hundreds or even
        thousands of document snapshots, it becomes unfeasable and detrimental
        to attempt to load all snapshot records. Therefore, some method must be
        found to limit the results to the latest snapshot. In-memory sorting is
        not ideal inside a web application. The database layer is the
        appropriate and most efficient place to perform sorting and limiting but
        a more complex query is required.

        I use a self-join to find the latest Snapshot record for each
        DocumentsLanguages record. I am tired of fighting with the ORM for
        everything but the most basic queries. I am therefore using a raw SQL
        string.

        Note the added "raw" field in the SELECT query. Attempting to get the
        documents_languages_id from the Snapshot field causes the ORM to issue
        a separate query for each documents_languages_id since the field is a
        DocumentsLanguages instance. By using the ORM's annotation feature with
        raw(), I can get the simple integer documents_languages_id without
        loading an entire DocumentsLanguages instance data from the database.

        Returns:
            dict: A dictionary of the latest Snapshot text for each
            Documentslanguages record, keyed by the snapshot's
            documents_languages_id.

        """
        snapshot_sql = '''
            SELECT
                {Snapshot}.snapshot_id
                ,{Snapshot}.documents_languages_id
                ,{Snapshot}.date
                ,{Snapshot}.time
                ,{Snapshot}.datetime
                ,{Snapshot}.text
                ,{Snapshot}.documents_languages_id AS raw_documents_languages_id
            FROM
                {Snapshot}
                LEFT JOIN {Snapshot} as snapshot_2
                    ON snapshot_2.documents_languages_id = {Snapshot}.documents_languages_id
                    AND snapshot_2.datetime > {Snapshot}.datetime
                INNER JOIN {DocumentsLanguages}
                    ON {DocumentsLanguages}.documents_languages_id = {Snapshot}.documents_languages_id
                    AND {DocumentsLanguages}.is_enabled IS TRUE
            WHERE
                snapshot_2.snapshot_id IS NULL'''
        dl_db_table = django_docsnaps.models.DocumentsLanguages._meta.db_table
        s_db_table = django_docsnaps.models.Snapshot._meta.db_table
        snapshot_sql = snapshot_sql.format(DocumentsLanguages=dl_db_table,
                                           Snapshot=s_db_table)

        try:
            snapshot_set = django_docsnaps.models.Snapshot.objects.raw(
                snapshot_sql)
        except django.db.Error as exception:
            command_utils.raise_command_error(
                self.stdout, 'A database error occurred: ' + str(exception))

        snapshot_dict = {
            snapshot.raw_documents_languages_id: snapshot \
            for snapshot in snapshot_set}

        return snapshot_dict
コード例 #7
0
    def _validate_module_models(self, module):
        """
        Validate that the module returns complete model hierarchy.

        Checks for the full model relationship tree. The plugin module must
        provide an iterable of DocumentsLanguages instances, the relationship
        fields of which will be traversed.

        The goal in validating the models is not to attempt to circumvent
        Pythonic duck typing, but to generate helpful error messages for plugin
        developers.

        It is best to call this after validating module interface in order to
        avoid potential AttributeErrors.

        Args:
            module: The imported plugin module as returned by importlib.

        Raises:
            django.core.management.base.CommandError: With descriptive messages
                iff module is invalid.

        """
        self.stdout.write('Validating module models: ', ending='')

        module_models = module.get_models()

        # Ensure returned models value is not empty.
        if not module_models:
            command_utils.raise_command_error(
                self.stdout,
                module.__name__ + '.get_models() returned an empty iterable.')

        # Ensure returned models value is an iterable.
        try:
            iter(module_models)
        except TypeError as type_error:
            command_utils.raise_command_error(self.stdout, type_error)

        # Ensure returned models value is an iterable of correct types and
        # minimum necessary model relationship trees are provided.
        # Consider using set.symmetric_difference().
        # https://docs.python.org/3/library/stdtypes.html#set.symmetric_difference
        required_classes = {
            django_docsnaps.models.Document, django_docsnaps.models.Language,
            django_docsnaps.models.DocumentsLanguages
        }
        for model in module_models:
            if isinstance(model, django_docsnaps.models.DocumentsLanguages):
                returned_classes = set([
                    m.__class__
                    for m in command_utils.flatten_model_graph(model)
                ])
                diff = required_classes.difference(returned_classes)
                if diff:
                    command_utils.raise_command_error(
                        self.stdout,
                        module.__name__ + '.get_models() returned instance ' +
                        str(model) + ', the model relationships under which '
                        'are missing at least one instance of the following '
                        'models: ' + ', '.join(map(str, diff)))
            else:
                command_utils.raise_command_error(
                    self.stdout, module.__name__ + '.get_models() '
                    'returned incorrect type "' + str(type(model)) + '" '
                    'in iterable.')

        self.stdout.write(self.style.SUCCESS('success'))