Example #1
0
def requeue_downloader_job(last_job: DownloaderJob) -> None:
    """Queues a new downloader job.

    The new downloader job will have num_retries one greater than
    last_job.num_retries.
    """
    num_retries = last_job.num_retries + 1

    new_job = DownloaderJob(num_retries=num_retries,
                            downloader_task=last_job.downloader_task,
                            accession_code=last_job.accession_code)
    new_job.save()

    for original_file in last_job.original_files.all():
        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=new_job, original_file=original_file)

    logger.debug(
        "Requeuing Downloader Job which had ID %d with a new Downloader Job with ID %d.",
        last_job.id, new_job.id)
    try:
        if send_job(Downloaders[last_job.downloader_task],
                    job=new_job,
                    is_dispatch=True):
            last_job.retried = True
            last_job.success = False
            last_job.retried_job = new_job
            last_job.save()
        else:
            # Can't communicate with nomad just now, leave the job for a later loop.
            new_job.delete()
    except:
        logger.error(
            "Failed to requeue Downloader Job which had ID %d with a new Downloader Job with ID %d.",
            last_job.id, new_job.id)
        # Can't communicate with nomad just now, leave the job for a later loop.
        new_job.delete()
Example #2
0
def requeue_downloader_job(last_job: DownloaderJob) -> (bool, str):
    """Queues a new downloader job.

    The new downloader job will have num_retries one greater than
    last_job.num_retries.

    Returns True and the volume index of the downloader job upon successful dispatching,
    False and an empty string otherwise.
    """
    num_retries = last_job.num_retries + 1

    ram_amount = last_job.ram_amount
    # If there's no start time then it's likely that the instance got
    # cycled which means we didn't get OOM-killed, so we don't need to
    # increase the RAM amount.
    if last_job.start_time and last_job.failure_reason is None:
        if ram_amount == 1024:
            ram_amount = 4096
        elif ram_amount == 4096:
            ram_amount = 16384

    original_file = last_job.original_files.first()

    if not original_file:
        last_job.no_retry = True
        last_job.success = False
        last_job.failure_reason = (
            "Foreman told to requeue a DownloaderJob without an OriginalFile - why?!"
        )
        last_job.save()
        logger.info(
            "Foreman told to requeue a DownloaderJob without an OriginalFile - why?!",
            last_job=str(last_job),
        )
        return False

    if not original_file.needs_processing():
        last_job.no_retry = True
        last_job.success = False
        last_job.failure_reason = "Foreman told to redownload job with prior successful processing."
        last_job.save()
        logger.info(
            "Foreman told to redownload job with prior successful processing.",
            last_job=str(last_job),
        )
        return False

    first_sample = original_file.samples.first()

    # This is a magic string that all the dbGaP studies appear to have
    if first_sample and ("in the dbGaP study" in first_sample.title):
        last_job.no_retry = True
        last_job.success = False
        last_job.failure_reason = "Sample is dbGaP access controlled."
        last_job.save()
        logger.info(
            "Avoiding requeuing for DownloaderJob for dbGaP run accession: " +
            str(first_sample.accession_code))
        return False

    new_job = DownloaderJob(
        num_retries=num_retries,
        downloader_task=last_job.downloader_task,
        ram_amount=ram_amount,
        accession_code=last_job.accession_code,
        was_recreated=last_job.was_recreated,
    )
    new_job.save()

    for original_file in last_job.original_files.all():
        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=new_job, original_file=original_file)

    logger.debug(
        "Requeuing Downloader Job which had ID %d with a new Downloader Job with ID %d.",
        last_job.id,
        new_job.id,
    )
    try:
        if send_job(Downloaders[last_job.downloader_task],
                    job=new_job,
                    is_dispatch=True):
            last_job.retried = True
            last_job.success = False
            last_job.retried_job = new_job
            last_job.save()
        else:
            # Can't communicate with Batch just now, leave the job for a later loop.
            new_job.delete()
            return False
    except Exception:
        logger.error(
            "Failed to requeue DownloaderJob which had ID %d with a new DownloaderJob with ID %d.",
            last_job.id,
            new_job.id,
        )
        # Can't communicate with Batch just now, leave the job for a later loop.
        new_job.delete()
        return False

    return True