def task_status(task_id):
    """
    Check and return the current state of the create_and_upload_archive celery task with task id
    of <task_id>.

    If the task is unknown, state will be PENDING. Once the task starts it will be updated to
    STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE.

    See update_state() within create_and_upload_archive and
    http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details.

    If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3
    """
    task = create_and_upload_archive.AsyncResult(task_id)
    task_tracker = tables.ArchiverTask.query.filter(
        tables.ArchiverTask.task_id == task_id).first()
    log = logger.bind(archiver_task=task_id, archiver_task_state=task.state)
    log.info("checking status of task id {}: current state {}".format(
        task_id, task.state))
    task_info = task.info or {}
    response = {
        'state': task.state,
    }
    if task.state != 'FAILURE':
        response['status'] = task_info.get(
            'status', 'no status available at this point.')
        response['src_url'] = task_info.get('src_url', '')
        response['s3_urls'] = task_info.get('s3_urls', {})
    else:
        # something went wrong
        response['status'] = str(task.info)  # this is the exception raised
        response['src_url'] = ''
        response['s3_urls'] = {}

    # archiver does not create any custom states, so we can assume to have only the defaults:
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states
    # therefore, delete our state_id tracker from the db if the celery state is in a final state:
    # e.g. not RETRY, STARTED, or PENDING
    if task_tracker:
        if task.state in FINISHED_STATES:
            delete_tracker(task_tracker)
        elif task.state == "PENDING" and task_tracker.pending_expires_at < now(
        ):
            log.info(
                "Task {} has expired from pending too long. Re-creating task".
                format(task.id))
            renew_tracker_pending_expiry(
                task_tracker)  # let exceptions bubble up before moving on
            create_and_upload_archive.apply_async(
                args=[task_tracker.src_url, task_tracker.s3_key],
                task_id=task.id)
            response['state'] = 'RETRY'
            response[
                'status'] = 'Task has expired from pending for too long. Re-creating task.'
        elif task_tracker.state != task.state:
            update_tracker_state(task_tracker, task.state)

    return MozharnessArchiveTask(**response)
Example #2
0
def task_status(task_id):
    """
    Check and return the current state of the create_and_upload_archive celery task with task id
    of <task_id>.

    If the task is unknown, state will be PENDING. Once the task starts it will be updated to
    STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE.

    See update_state() within create_and_upload_archive and
    http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details.

    If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3
    """
    task = create_and_upload_archive.AsyncResult(task_id)
    task_tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
    log = logger.bind(archiver_task=task_id, archiver_task_state=task.state)
    log.info("checking status of task id {}: current state {}".format(task_id, task.state))
    task_info = task.info or {}
    response = {
        'state': task.state,
    }
    if task.state != 'FAILURE':
        response['status'] = task_info.get('status', 'no status available at this point.')
        response['src_url'] = task_info.get('src_url', '')
        response['s3_urls'] = task_info.get('s3_urls', {})
    else:
        # something went wrong
        response['status'] = str(task.info)  # this is the exception raised
        response['src_url'] = ''
        response['s3_urls'] = {}

    # archiver does not create any custom states, so we can assume to have only the defaults:
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states
    # therefore, delete our state_id tracker from the db if the celery state is in a final state:
    # e.g. not RETRY, STARTED, or PENDING
    if task_tracker:
        if task.state in FINISHED_STATES:
            delete_tracker(task_tracker)
        elif task.state == "PENDING" and task_tracker.pending_expires_at < now():
            log.info("Task {} has expired from pending too long. Re-creating task".format(task.id))
            renew_tracker_pending_expiry(task_tracker)  # let exceptions bubble up before moving on
            create_and_upload_archive.apply_async(args=[task_tracker.src_url, task_tracker.s3_key],
                                                  task_id=task.id)
            response['state'] = 'RETRY'
            response['status'] = 'Task has expired from pending for too long. Re-creating task.'
        elif task_tracker.state != task.state:
            update_tracker_state(task_tracker, task.state)

    return MozharnessArchiveTask(**response)
Example #3
0
def get_archive(src_url, key, preferred_region):
    """
    A generic getter for retrieving an s3 location of an archive where the archive is based off a
    src_url.

    sub-dir: hg.mozilla.org supports archives of sub directories within a repository. This
    flexibility allows for creating archives of only a portion of what would normally be an entire
    repo archive.

    logic flow:
     If their is already a key within s3, a re-direct link is given for the
    s3 location. If the key does not exist, download the archive from src url, upload it to s3
    for each region supported and return all uploaded s3 url locations.

     When the key does not exist, the remaining work will be assigned to a celery background task
    with a url location returned immediately for obtaining task state updates.
    """
    buckets = current_app.config['ARCHIVER_S3_BUCKETS']
    random_region = buckets.keys()[randint(0, len(buckets.keys()) - 1)]
    # use preferred region if available otherwise choose a valid one at random
    region = preferred_region if preferred_region and preferred_region in buckets else random_region
    bucket = buckets[region]
    s3 = current_app.aws.connect_to('s3', region)
    session = current_app.db.session('relengapi')

    # first, see if the key exists
    if not s3.get_bucket(bucket).get_key(key):
        task_id = key.replace('/', '_')  # keep things simple and avoid slashes in task url
        # can't use unique support:
        # api.pub.build.mozilla.org/docs/development/databases/#unique-row-support-get-or-create
        # because we want to know when the row doesn't exist before creating it
        tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
        if tracker and tracker.state in FINISHED_STATES:
            log = logger.bind(archiver_task=task_id, archiver_task_state=tracker.state)
            log.info('Task tracker: {} exists but finished with state: '
                     '{}'.format(task_id, tracker.state))
            # remove tracker and try celery task again
            delete_tracker(tracker)
            tracker = None
        if not tracker:
            log = logger.bind(archiver_task=task_id)
            log.info("Creating new celery task and task tracker for: {}".format(task_id))
            task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=task_id)
            if task and task.id:
                pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN)
                session.add(tables.ArchiverTask(task_id=task.id, s3_key=key, created_at=now(),
                                                pending_expires_at=pending_expires_at,
                                                src_url=src_url, state="PENDING"))
                session.commit()
            else:
                return {}, 500
        return {}, 202, {'Location': url_for('archiver.task_status', task_id=task_id)}

    logger.info("generating GET URL to {}, expires in {}s".format(key, GET_EXPIRES_IN))
    # return 302 pointing to s3 url with archive
    signed_url = s3.generate_url(
        method='GET', expires_in=GET_EXPIRES_IN,
        bucket=bucket, key=key
    )
    return redirect(signed_url)
Example #4
0
def test_successful_upload_archive_response(app):
    setup_buckets(app, cfg)
    rev, repo, subdir, suffix = '203e1025a826', 'mozilla-central', 'testing/mozharness', 'tar.gz'
    key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix)
    if subdir:
        key += '/{}'.format(subdir)
    src_url = cfg['ARCHIVER_HGMO_URL_TEMPLATE'].format(
        repo=repo, rev=rev, suffix=suffix, subdir='testing/mozharness')
    with app.app_context():
        with mock.patch("relengapi.blueprints.archiver.tasks.requests.get") as get, \
                mock.patch("relengapi.blueprints.archiver.tasks.requests.head") as head:
            get.return_value = fake_200_response()
            head.return_value = fake_200_response()
            task = create_and_upload_archive.apply_async(args=[src_url, key],
                                                         task_id=key.replace(
                                                             '/', '_'))
    expected_regions = [region for region in cfg['ARCHIVER_S3_BUCKETS']]
    all_regions_have_s3_urls = [
        task.info.get("s3_urls", {}).get(region) for region in expected_regions
    ]
    assert all(
        all_regions_have_s3_urls), "s3 urls not uploaded for each region!"
    assert task.info.get(
        'src_url') == src_url, "src url doesn't match upload response!"
    assert task.state == "SUCCESS", "completed task's state isn't SUCCESS!"
def test_invalid_hg_url(app):
    setup_buckets(app, cfg)
    rev, repo, suffix = "fakeRev", "mozilla-central", "tar.gz"
    key = "{repo}-{rev}.{suffix}".format(repo=repo, rev=rev, suffix=suffix)
    src_url = cfg["ARCHIVER_HGMO_URL_TEMPLATE"].format(repo=repo, rev=rev, suffix=suffix, subdir="testing/mozharness")
    with app.app_context():
        with mock.patch("relengapi.blueprints.archiver.tasks.requests.head") as head:
            head.return_value = fake_404_response()
            task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=key.replace("/", "_"))
    assert "Url not found." in task.info.get("status", {}), "invalid hg url was not caught!"
Example #6
0
def test_invalid_hg_url(app):
    setup_buckets(app, cfg)
    rev, repo, suffix = 'fakeRev', 'mozilla-central', 'tar.gz'
    key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix)
    src_url = cfg['ARCHIVER_HGMO_URL_TEMPLATE'].format(repo=repo, rev=rev, suffix=suffix,
                                                       subdir='testing/mozharness')
    with app.app_context():
        with mock.patch("relengapi.blueprints.archiver.tasks.requests.get") as get:
            get.return_value = fake_404_response()
            task = create_and_upload_archive.apply_async(args=[src_url, key],
                                                         task_id=key.replace('/', '_'))
    assert "Could not get a valid response from src_url" in task.info.get('status', {}), \
        "invalid hg url was not caught!"
def test_successful_upload_archive_response(app):
    setup_buckets(app, cfg)
    rev, repo, subdir, suffix = "203e1025a826", "mozilla-central", "testing/mozharness", "tar.gz"
    key = "{repo}-{rev}.{suffix}".format(repo=repo, rev=rev, suffix=suffix)
    if subdir:
        key += "/{}".format(subdir)
    src_url = cfg["ARCHIVER_HGMO_URL_TEMPLATE"].format(repo=repo, rev=rev, suffix=suffix, subdir="testing/mozharness")
    with app.app_context():
        with mock.patch("relengapi.blueprints.archiver.tasks.requests.get") as get, mock.patch(
            "relengapi.blueprints.archiver.tasks.requests.head"
        ) as head:
            get.return_value = fake_200_response()
            head.return_value = fake_200_response()
            task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=key.replace("/", "_"))
    expected_regions = [region for region in cfg["ARCHIVER_S3_BUCKETS"]]
    all_regions_have_s3_urls = [task.info.get("s3_urls", {}).get(region) for region in expected_regions]
    assert all(all_regions_have_s3_urls), "s3 urls not uploaded for each region!"
    assert task.info.get("src_url") == src_url, "src url doesn't match upload response!"
    assert task.state == "SUCCESS", "completed task's state isn't SUCCESS!"