def deleteIncompleteTiles(self, params):
     result = {'removed': 0}
     while True:
         item = Item().findOne({'largeImage.expected': True})
         if not item:
             break
         job = Job().load(item['largeImage']['jobId'], force=True)
         if job and job.get('status') in (JobStatus.QUEUED,
                                          JobStatus.RUNNING):
             job = Job().cancelJob(job)
         if job and job.get('status') in (JobStatus.QUEUED,
                                          JobStatus.RUNNING):
             result['message'] = ('The job for item %s could not be '
                                  'canceled' % (str(item['_id'])))
             break
         ImageItem().delete(item)
         result['removed'] += 1
     return result
Ejemplo n.º 2
0
    def delete(self, item, skipFileIds=None):
        deleted = False
        if 'largeImage' in item:
            job = None
            if 'jobId' in item['largeImage']:
                try:
                    job = Job().load(item['largeImage']['jobId'],
                                     force=True,
                                     exc=True)
                except ValidationException:
                    # The job has been deleted, but we still need to clean up
                    # the rest of the tile information
                    pass
            if (item['largeImage'].get('expected') and job
                    and job.get('status')
                    in (JobStatus.QUEUED, JobStatus.RUNNING)):
                # cannot cleanly remove the large image, since a conversion
                # job is currently in progress
                # TODO: cancel the job
                # TODO: return a failure error code
                return False

            # If this file was created by the worker job, delete it
            if 'jobId' in item['largeImage']:
                # To eliminate all traces of the job, add
                # if job:
                #     Job().remove(job)
                del item['largeImage']['jobId']

            if 'originalId' in item['largeImage']:
                # The large image file should not be the original file
                assert item['largeImage']['originalId'] != \
                    item['largeImage'].get('fileId')

                if ('fileId' in item['largeImage'] and
                    (not skipFileIds
                     or item['largeImage']['fileId'] not in skipFileIds)):
                    file = File().load(id=item['largeImage']['fileId'],
                                       force=True)
                    if file:
                        File().remove(file)
                del item['largeImage']['originalId']

            del item['largeImage']

            item = self.save(item)
            deleted = True
        self.removeThumbnailFiles(item)
        return deleted
def createThumbnailsJob(job):
    """
    Create thumbnails for all of the large image items.

    :param job: the job object including kwargs which contains:
        spec: an array, each entry of which is the parameter dictionary for
            the model getThumbnail function.
        logInterval: the time in seconds between log messages.  This also
            controls the granularity of cancelling the job.
        concurrent: the number of threads to use.  0 for the number of cpus.
    """
    job = Job().updateJob(job,
                          log='Started creating large image thumbnails\n',
                          status=JobStatus.RUNNING)
    concurrency = int(job['kwargs'].get('concurrent', 0))
    concurrency = psutil.cpu_count(
        logical=True) if concurrency < 1 else concurrency
    status = {
        'checked': 0,
        'created': 0,
        'failed': 0,
    }

    spec = job['kwargs']['spec']
    logInterval = float(job['kwargs'].get('logInterval', 10))
    job = Job().updateJob(job,
                          log='Creating thumbnails (%d concurrent)\n' %
                          concurrency)
    nextLogTime = time.time() + logInterval
    tasks = []
    # This could be switched from ThreadPoolExecutor to ProcessPoolExecutor
    # without any other changes.  Doing so would probably improve parallel
    # performance, but may not work reliably under Python 2.x.
    pool = concurrent.futures.ThreadPoolExecutor(max_workers=concurrency)
    try:
        # Get a cursor with the list of images
        items = Item().find({'largeImage.fileId': {'$exists': True}})
        if hasattr(items, 'count'):
            status['items'] = items.count()
        status['specs'] = len(spec)
        nextitem = cursorNextOrNone(items)
        while len(tasks) or nextitem is not None:
            # Create more tasks than we strictly need so if one finishes before
            # we check another will be ready.  This is balanced with not
            # creating too many to avoid excessive memory use.  As such, we
            # can't do a simple iteration over the database cursor, as it will
            # be exhausted before we are done.
            while len(tasks) < concurrency * 4 and nextitem is not None:
                tasks.append(
                    pool.submit(createThumbnailsJobTask, nextitem, spec))
                nextitem = cursorNextOrNone(items)
            # Wait a short time or until the oldest task is complete
            try:
                tasks[0].result(0.1)
            except concurrent.futures.TimeoutError:
                pass
            # Remove completed tasks from our list, adding their results to the
            # status.
            for pos in range(len(tasks) - 1, -1, -1):
                if tasks[pos].done():
                    r = tasks[pos].result()
                    status['created'] += r['created']
                    status['checked'] += r['checked']
                    status['failed'] += r['failed']
                    status['lastFailed'] = r.get('lastFailed',
                                                 status.get('lastFailed'))
                    tasks[pos:pos + 1] = []
            # Periodically, log the state of the job and check if it was
            # deleted or canceled.
            if time.time() > nextLogTime:
                job, msg = createThumbnailsJobLog(job, status)
                # Check if the job was deleted or canceled; if so, quit
                job = Job().load(id=job['_id'], force=True)
                if not job or job['status'] in (JobStatus.CANCELED,
                                                JobStatus.ERROR):
                    cause = {
                        None: 'deleted',
                        JobStatus.CANCELED: 'canceled',
                        JobStatus.ERROR: 'stopped due to error',
                    }[None if not job else job.get('status')]
                    msg = 'Large image thumbnails job %s' % cause
                    logger.info(msg)
                    # Cancel any outstanding tasks.  If they haven't started,
                    # they are discarded.  Those that have started will still
                    # run, though.
                    for task in tasks:
                        task.cancel()
                    return
                nextLogTime = time.time() + logInterval
    except Exception:
        logger.exception('Error with large image create thumbnails job')
        Job().updateJob(job,
                        log='Error creating large image thumbnails\n',
                        status=JobStatus.ERROR)
        return
    finally:
        # Clean up the task pool asynchronously
        pool.shutdown(False)
    job, msg = createThumbnailsJobLog(job, status, 'Finished: ',
                                      JobStatus.SUCCESS)
    logger.info(msg)
Ejemplo n.º 4
0
def onJobUpdate(event):
    """
    Hook into job update event so we can look for job failure events and email
    the user and challenge/phase administrators accordingly. Here, an
    administrator is defined to be a user with WRITE access or above.
    """
    isErrorStatus = False
    try:
        isErrorStatus = int(event.info['params'].get('status')) == JobStatus.ERROR
    except (ValueError, TypeError):
        pass

    if (event.info['job']['type'] == 'covalic_score' and isErrorStatus):
        covalicHost = posixpath.dirname(mail_utils.getEmailUrlPrefix())

        # Create minimal log that contains only Covalic errors.
        # Use full log if no Covalic-specific errors are found.
        # Fetch log from model, because log in event may not be up-to-date.
        job = Job().load(
            event.info['job']['_id'], includeLog=True, force=True)
        log = job.get('log')

        minimalLog = None
        if log:
            log = ''.join(log)
            minimalLog = '\n'.join([line[len(JOB_LOG_PREFIX):].strip()
                                    for line in log.splitlines()
                                    if line.startswith(JOB_LOG_PREFIX)])
        if not minimalLog:
            minimalLog = log

        submission = Submission().load(
            event.info['job']['covalicSubmissionId'])
        phase = Phase().load(
            submission['phaseId'], force=True)
        challenge = Challenge().load(
            phase['challengeId'], force=True)
        user = User().load(
            event.info['job']['userId'], force=True)

        rescoring = job.get('rescoring', False)

        # Mail admins, include full log
        emails = sorted(getPhaseUserEmails(
            phase, AccessType.WRITE, includeChallengeUsers=True))
        html = mail_utils.renderTemplate('covalic.submissionErrorAdmin.mako', {
            'submission': submission,
            'challenge': challenge,
            'phase': phase,
            'user': user,
            'host': covalicHost,
            'log': log
        })
        mail_utils.sendEmail(
            to=emails, subject='Submission processing error', text=html)

        # Mail user, include minimal log
        if not rescoring:
            html = mail_utils.renderTemplate('covalic.submissionErrorUser.mako', {
                'submission': submission,
                'challenge': challenge,
                'phase': phase,
                'host': covalicHost,
                'log': minimalLog
            })
            mail_utils.sendEmail(
                to=user['email'], subject='Submission processing error', text=html)