Ejemplo n.º 1
0
def pop_batch_queue_item(batch_job):
    """
    Get the next batch queue item for the given batch job. Return its fields
    as a tuple `item`, `flags` and remove it from the database.

    If no batch queue item could be found for this batch job, return `None`.

    .. note:: Originally, finding the next batch queue item was done using a
        more complicated query::

            SELECT QueueID, Input, Flags
            FROM BatchQueue
            WHERE QueueID = (
                SELECT MIN(QueueID)
                FROM BatchQueue
                GROUP BY JobID
                HAVING JobID = {batch_job.id}
            );

        However, I couldn't see any significant performance difference in my
        latest benchmarks, so we stick with the more obvious query for now.
    """
    batch_queue_item = BatchQueueItem.query \
        .filter_by(batch_job=batch_job) \
        .order_by(BatchQueueItem.id.asc()) \
        .first()
    if batch_queue_item is None:
        return None

    item, flags = batch_queue_item.item, batch_queue_item.flags

    session.delete(batch_queue_item)
    session.commit()

    return item, flags
Ejemplo n.º 2
0
def pop_batch_queue_item(batch_job):
    """
    Get the next batch queue item for the given batch job. Return its fields
    as a tuple `item`, `flags` and remove it from the database.

    If no batch queue item could be found for this batch job, return `None`.

    .. note:: Originally, finding the next batch queue item was done using a
        more complicated query::

            SELECT QueueID, Input, Flags
            FROM BatchQueue
            WHERE QueueID = (
                SELECT MIN(QueueID)
                FROM BatchQueue
                GROUP BY JobID
                HAVING JobID = {batch_job.id}
            );

        However, I couldn't see any significant performance difference in my
        latest benchmarks, so we stick with the more obvious query for now.
    """
    batch_queue_item = BatchQueueItem.query \
        .filter_by(batch_job=batch_job) \
        .order_by(BatchQueueItem.id.asc()) \
        .first()
    if batch_queue_item is None:
        return None

    item, flags = batch_queue_item.item, batch_queue_item.flags

    session.delete(batch_queue_item)
    session.commit()

    return item, flags
Ejemplo n.º 3
0
    def process(self):
        """
        Start the mutalyzer Batch Processing. This method retrieves all jobs
        jobs from the database and processes them in a roundrobin fashion.
        After each round, the process checks if new jobs are added during the
        last processing round and repeats. This continue until no jobs are
        left to process.

        If during this process the {stop} method is called, the current
        job item is completed and we return.

        This method uses two database tables, BatchJob and BatchQueue.

        The jobList is an array of tuples with three elements
            - jobID       ;   The ID of the job
            - jobType     ;   The type of the job
            - argument1   ;   Currently only used for the ConversionChecker
                            to send the build version.

        If the jobList is not empty, the method will iterate once over the
        list and fetch the first entry of a job from the database table
        BatchQueue. This request returns both the input for the batch and
        the flags for the job.

        #Flags
        A job can be flagged in three ways:
          - A       ;   Altered - this means that the input is altered
                        before execution. This could be the case if an
                        entry uses an accession number without a version.
                        If a version is retrieved from the NCBI, all
                        further occurences of that accession will be
                        replaced by the accession with version number.
          - S       ;   Skipped - this means that this batchentry will be
                        skipped by the batchprocess. This could be the
                        case if the user made a mistake that could not be
                        auto fixed and henceforth all occurences of the
                        mistake will be skipped.
          - C       ;   Continue - this means the input does not end the
                        current row, so no new row in the output should
                        be started.

        A Flag consists of either an A, S or C followed by a digit, which
        refers to the reason of alteration / skip.
        """
        while not self.stopped():
            batch_jobs = BatchJob.query

            if batch_jobs.count() == 0:
                break

            for batch_job in batch_jobs:
                if self.stopped():
                    break

                batch_queue_item = queries.pop_batch_queue_item(batch_job)

                if batch_queue_item is not None:
                    item, flags = batch_queue_item

                    if batch_job.job_type == 'name-checker':
                        self._processNameBatch(batch_job, item, flags)
                    elif batch_job.job_type == 'syntax-checker':
                        self._processSyntaxCheck(batch_job, item, flags)
                    elif batch_job.job_type == 'position-converter':
                        self._processConversion(batch_job, item, flags)
                    elif batch_job.job_type == 'snp-converter':
                        self._processSNP(batch_job, item, flags)
                    else:
                        # Unknown job type, should never happen.
                        # Todo: Log some screaming message.
                        pass

                else:
                    print ('Job %s finished, email %s file %s'
                           % (batch_job.id, batch_job.email, batch_job.id))
                    self.__sendMail(batch_job.email, batch_job.download_url)
                    session.delete(batch_job)
                    session.commit()
Ejemplo n.º 4
0
    def process(self):
        """
        Start the mutalyzer Batch Processing. This method retrieves all jobs
        jobs from the database and processes them in a roundrobin fashion.
        After each round, the process checks if new jobs are added during the
        last processing round and repeats. This continue until no jobs are
        left to process.

        If during this process the {stop} method is called, the current
        job item is completed and we return.

        This method uses two database tables, BatchJob and BatchQueue.

        The jobList is an array of tuples with three elements
            - jobID       ;   The ID of the job
            - jobType     ;   The type of the job
            - argument1   ;   Currently only used for the ConversionChecker
                            to send the build version.

        If the jobList is not empty, the method will iterate once over the
        list and fetch the first entry of a job from the database table
        BatchQueue. This request returns both the input for the batch and
        the flags for the job.

        #Flags
        A job can be flagged in three ways:
          - A       ;   Altered - this means that the input is altered
                        before execution. This could be the case if an
                        entry uses an accession number without a version.
                        If a version is retrieved from the NCBI, all
                        further occurences of that accession will be
                        replaced by the accession with version number.
          - S       ;   Skipped - this means that this batchentry will be
                        skipped by the batchprocess. This could be the
                        case if the user made a mistake that could not be
                        auto fixed and henceforth all occurences of the
                        mistake will be skipped.
          - C       ;   Continue - this means the input does not end the
                        current row, so no new row in the output should
                        be started.

        A Flag consists of either an A, S or C followed by a digit, which
        refers to the reason of alteration / skip.
        """
        while not self.stopped():
            batch_jobs = BatchJob.query

            if batch_jobs.count() == 0:
                break

            for batch_job in batch_jobs:
                if self.stopped():
                    break

                batch_queue_item = queries.pop_batch_queue_item(batch_job)

                if batch_queue_item is not None:
                    item, flags = batch_queue_item

                    if batch_job.job_type == 'name-checker':
                        self._processNameBatch(batch_job, item, flags)
                    elif batch_job.job_type == 'syntax-checker':
                        self._processSyntaxCheck(batch_job, item, flags)
                    elif batch_job.job_type == 'position-converter':
                        self._processConversion(batch_job, item, flags)
                    elif batch_job.job_type == 'snp-converter':
                        self._processSNP(batch_job, item, flags)
                    else:
                        # Unknown job type, should never happen.
                        # Todo: Log some screaming message.
                        pass

                else:
                    print('Job %s finished, email %s file %s' %
                          (batch_job.id, batch_job.email, batch_job.id))
                    self.__sendMail(batch_job.email, batch_job.download_url)
                    session.delete(batch_job)
                    session.commit()