コード例 #1
0
ファイル: __init__.py プロジェクト: Callek/build-relengapi
def get_archive(src_url, key, preferred_region):
    """
    A generic getter for retrieving an s3 location of an archive where the archive is based off a
    src_url.

    sub-dir: hg.mozilla.org supports archives of sub directories within a repository. This
    flexibility allows for creating archives of only a portion of what would normally be an entire
    repo archive.

    logic flow:
     If their is already a key within s3, a re-direct link is given for the
    s3 location. If the key does not exist, download the archive from src url, upload it to s3
    for each region supported and return all uploaded s3 url locations.

     When the key does not exist, the remaining work will be assigned to a celery background task
    with a url location returned immediately for obtaining task state updates.
    """
    buckets = current_app.config['ARCHIVER_S3_BUCKETS']
    random_region = buckets.keys()[randint(0, len(buckets.keys()) - 1)]
    # use preferred region if available otherwise choose a valid one at random
    region = preferred_region if preferred_region and preferred_region in buckets else random_region
    bucket = buckets[region]
    s3 = current_app.aws.connect_to('s3', region)
    session = current_app.db.session('relengapi')

    # first, see if the key exists
    if not s3.get_bucket(bucket).get_key(key):
        task_id = key.replace('/', '_')  # keep things simple and avoid slashes in task url
        # can't use unique support:
        # api.pub.build.mozilla.org/docs/development/databases/#unique-row-support-get-or-create
        # because we want to know when the row doesn't exist before creating it
        tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
        if tracker and tracker.state in FINISHED_STATES:
            log = logger.bind(archiver_task=task_id, archiver_task_state=tracker.state)
            log.info('Task tracker: {} exists but finished with state: '
                     '{}'.format(task_id, tracker.state))
            # remove tracker and try celery task again
            delete_tracker(tracker)
            tracker = None
        if not tracker:
            log = logger.bind(archiver_task=task_id)
            log.info("Creating new celery task and task tracker for: {}".format(task_id))
            task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=task_id)
            if task and task.id:
                pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN)
                session.add(tables.ArchiverTask(task_id=task.id, s3_key=key, created_at=now(),
                                                pending_expires_at=pending_expires_at,
                                                src_url=src_url, state="PENDING"))
                session.commit()
            else:
                return {}, 500
        return {}, 202, {'Location': url_for('archiver.task_status', task_id=task_id)}

    logger.info("generating GET URL to {}, expires in {}s".format(key, GET_EXPIRES_IN))
    # return 302 pointing to s3 url with archive
    signed_url = s3.generate_url(
        method='GET', expires_in=GET_EXPIRES_IN,
        bucket=bucket, key=key
    )
    return redirect(signed_url)
def create_fake_tracker_row(app, id, s3_key='key', created_at=None, pending_expires_at=None,
                            src_url='https://foo.com', state="PENDING"):
    now = datetime.datetime(2015, 7, 14, 23, 19, 42, tzinfo=pytz.UTC)  # freeze time
    pending_expiry = now + datetime.timedelta(seconds=60)
    if not created_at:
        created_at = now
    if not pending_expires_at:
        pending_expires_at = pending_expiry
    session = app.db.session(tables.DB_DECLARATIVE_BASE)
    session.add(
        tables.ArchiverTask(task_id=id, s3_key=s3_key, created_at=created_at,
                            pending_expires_at=pending_expires_at, src_url=src_url, state=state)
    )
    session.commit()
def test_tracker_is_deleted_when_task_status_shows_task_complete(app, client):
    with app.app_context():
        task_id = 'foo'
        session = app.db.session(tables.DB_DECLARATIVE_BASE)
        now = datetime.datetime(2015, 7, 14, 23, 19, 42, tzinfo=pytz.UTC)  # freeze time
        pending_expiry = now + datetime.timedelta(seconds=60)
        session.add(tables.ArchiverTask(task_id=task_id, s3_key='key', created_at=now,
                                        pending_expires_at=pending_expiry,
                                        src_url='https://foo.com', state="PENDING"))
        session.commit()
        with mock.patch("relengapi.blueprints.archiver.create_and_upload_archive") as caua:
            caua.AsyncResult.return_value = fake_successful_task_status()
            client.get('/archiver/status/{task_id}'.format(task_id=task_id))
        tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
        eq_(tracker, None, "tracker was not deleted even though celery task completed.")