Ejemplo n.º 1
0
def check_pending_upload(session, pu, _test_shim=lambda: None):
    # we can check the upload any time between the expiration of the URL
    # (after which the user can't make any more changes, but the upload
    # may yet be incomplete) and 1 day afterward (ample time for the upload
    # to complete)
    sha512 = pu.file.sha512
    size = pu.file.size

    log = logger.bind(tooltool_sha512=sha512)

    if time.now() < pu.expires:
        # URL is not expired yet
        return
    elif time.now() > pu.expires + timedelta(days=1):
        # Upload will probably never complete
        log.info(
            "Deleting abandoned pending upload for {}".format(sha512))
        session.delete(pu)
        return

    # connect and see if the file exists..
    s3 = current_app.aws.connect_to('s3', pu.region)
    cfg = current_app.config.get('TOOLTOOL_REGIONS')
    if not cfg or pu.region not in cfg:
        log.warning("Pending upload for {} was to an un-configured "
                    "region".format(sha512))
        session.delete(pu)
        return

    bucket = s3.get_bucket(cfg[pu.region], validate=False)
    key = bucket.get_key(util.keyname(sha512))
    if not key:
        # not uploaded yet
        return

    # commit the session before verifying the file instance, since the
    # DB connection may otherwise go away while we're distracted.
    session.commit()
    _test_shim()

    if not verify_file_instance(sha512, size, key):
        log.warning(
            "Upload of {} was invalid; deleting key".format(sha512))
        key.delete()
        session.delete(pu)
        session.commit()
        return

    log.info("Upload of {} considered valid".format(sha512))
    # add a file instance, but it's OK if it already exists
    try:
        tables.FileInstance(file=pu.file, region=pu.region)
        session.commit()
    except sa.exc.IntegrityError:
        session.rollback()

    # and delete the pending upload
    session.delete(pu)
    session.commit()
Ejemplo n.º 2
0
def check_pending_upload(session, pu, _test_shim=lambda: None):
    # we can check the upload any time between the expiration of the URL
    # (after which the user can't make any more changes, but the upload
    # may yet be incomplete) and 1 day afterward (ample time for the upload
    # to complete)
    sha512 = pu.file.sha512
    size = pu.file.size

    log = logger.bind(tooltool_sha512=sha512, mozdef=True)

    if time.now() < pu.expires:
        # URL is not expired yet
        return
    elif time.now() > pu.expires + timedelta(days=1):
        # Upload will probably never complete
        log.info(
            "Deleting abandoned pending upload for {}".format(sha512))
        session.delete(pu)
        return

    # connect and see if the file exists..
    s3 = current_app.aws.connect_to('s3', pu.region)
    cfg = current_app.config.get('TOOLTOOL_REGIONS')
    if not cfg or pu.region not in cfg:
        log.warning("Pending upload for {} was to an un-configured "
                    "region".format(sha512))
        session.delete(pu)
        return

    bucket = s3.get_bucket(cfg[pu.region], validate=False)
    key = bucket.get_key(util.keyname(sha512))
    if not key:
        # not uploaded yet
        return

    # commit the session before verifying the file instance, since the
    # DB connection may otherwise go away while we're distracted.
    session.commit()
    _test_shim()

    if not verify_file_instance(sha512, size, key):
        log.warning(
            "Upload of {} was invalid; deleting key".format(sha512))
        key.delete()
        session.delete(pu)
        session.commit()
        return

    log.info("Upload of {} considered valid".format(sha512))
    # add a file instance, but it's OK if it already exists
    try:
        tables.FileInstance(file=pu.file, region=pu.region)
        session.commit()
    except sa.exc.IntegrityError:
        session.rollback()

    # and delete the pending upload
    session.delete(pu)
    session.commit()
Ejemplo n.º 3
0
def upload_complete(digest):
    """Signal that a file has been uploaded and the server should begin
    validating it.  This is merely an optimization: the server also polls
    occasionally for uploads and validates them when they appear.

    Uploads cannot be safely validated until the upload URL has expired, which
    occurs a short time after the URL is generated (currently 60 seconds but
    subject to change).

    If the upload URL has expired, then the response is an HTTP 202 indicating
    that the signal has been accepted.  If the URL has not expired, then the
    response is an HTTP 409, and the ``X-Retry-After`` header gives a time,
    in seconds, that the client should wait before trying again."""
    if not is_valid_sha512(digest):
        raise BadRequest("Invalid sha512 digest")

    # if the pending upload is still valid, then we can't check this file
    # yet, so return 409 Conflict.  If there is no PU, or it's expired,
    # then we can proceed.
    file = tables.File.query.filter(tables.File.sha512 == digest).first()
    if file:
        for pu in file.pending_uploads:
            until = pu.expires - time.now()
            if until > datetime.timedelta(0):
                # add 1 second to avoid rounding / skew errors
                hdr = {'X-Retry-After': str(1 + int(until.total_seconds()))}
                return Response(status=409, headers=hdr)

    # start a celery task in the background and return immediately
    grooming.check_file_pending_uploads.delay(digest)
    return '{}', 202
Ejemplo n.º 4
0
    def run_task(self, task):
        """Actually run a task, inserting a DB row and generating the celery task."""
        job = tables.BadpennyJob(task_id=task.task_id, created_at=time.now())
        current_app.db.session('relengapi').add(job)
        current_app.db.session('relengapi').commit()

        execution.submit_job(task_name=task.name, job_id=job.id)
Ejemplo n.º 5
0
def test_upload_batch_success_existing_pending_upload(client, app):
    """A successful POST to /upload updates the 'expires' column of any relevant
    pending uploads."""
    with set_time(NOW - 30):
        add_file_to_db(app, ONE, regions=[], pending_regions=['us-east-1'])
    batch = mkbatch()
    with set_time():
        with not_so_random_choice():
            resp = upload_batch(client, batch)
        result = assert_batch_response(resp,
                                       files={
                                           'one': {
                                               'algorithm': 'sha512',
                                               'size': len(ONE),
                                               'digest': ONE_DIGEST
                                           }
                                       })
        assert_signed_url(result['files']['one']['put_url'],
                          ONE_DIGEST,
                          method='PUT',
                          expires_in=60)
        assert_pending_upload(app,
                              ONE_DIGEST,
                              'us-east-1',
                              expires=relengapi_time.now() +
                              datetime.timedelta(seconds=60))
        assert_batch_row(app,
                         result['id'],
                         files=[('one', len(ONE), ONE_DIGEST, [])])
Ejemplo n.º 6
0
def get_archive(src_url, key, preferred_region):
    """
    A generic getter for retrieving an s3 location of an archive where the archive is based off a
    src_url.

    sub-dir: hg.mozilla.org supports archives of sub directories within a repository. This
    flexibility allows for creating archives of only a portion of what would normally be an entire
    repo archive.

    logic flow:
     If their is already a key within s3, a re-direct link is given for the
    s3 location. If the key does not exist, download the archive from src url, upload it to s3
    for each region supported and return all uploaded s3 url locations.

     When the key does not exist, the remaining work will be assigned to a celery background task
    with a url location returned immediately for obtaining task state updates.
    """
    buckets = current_app.config['ARCHIVER_S3_BUCKETS']
    random_region = buckets.keys()[randint(0, len(buckets.keys()) - 1)]
    # use preferred region if available otherwise choose a valid one at random
    region = preferred_region if preferred_region and preferred_region in buckets else random_region
    bucket = buckets[region]
    s3 = current_app.aws.connect_to('s3', region)
    session = current_app.db.session('relengapi')

    # first, see if the key exists
    if not s3.get_bucket(bucket).get_key(key):
        task_id = key.replace('/', '_')  # keep things simple and avoid slashes in task url
        # can't use unique support:
        # api.pub.build.mozilla.org/docs/development/databases/#unique-row-support-get-or-create
        # because we want to know when the row doesn't exist before creating it
        tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
        if tracker and tracker.state in FINISHED_STATES:
            log = logger.bind(archiver_task=task_id, archiver_task_state=tracker.state)
            log.info('Task tracker: {} exists but finished with state: '
                     '{}'.format(task_id, tracker.state))
            # remove tracker and try celery task again
            delete_tracker(tracker)
            tracker = None
        if not tracker:
            log = logger.bind(archiver_task=task_id)
            log.info("Creating new celery task and task tracker for: {}".format(task_id))
            task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=task_id)
            if task and task.id:
                pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN)
                session.add(tables.ArchiverTask(task_id=task.id, s3_key=key, created_at=now(),
                                                pending_expires_at=pending_expires_at,
                                                src_url=src_url, state="PENDING"))
                session.commit()
            else:
                return {}, 500
        return {}, 202, {'Location': url_for('archiver.task_status', task_id=task_id)}

    logger.info("generating GET URL to {}, expires in {}s".format(key, GET_EXPIRES_IN))
    # return 302 pointing to s3 url with archive
    signed_url = s3.generate_url(
        method='GET', expires_in=GET_EXPIRES_IN,
        bucket=bucket, key=key
    )
    return redirect(signed_url)
Ejemplo n.º 7
0
def update_tree_status(session, tree, status=None, reason=None,
                       tags=[], message_of_the_day=None):
    """Update the given tree's status; note that this does not commit
    the session.  Supply a tree object or name."""
    if status is not None:
        tree.status = status
    if reason is not None:
        tree.reason = reason
    if message_of_the_day is not None:
        tree.message_of_the_day = message_of_the_day

    # log it if the reason or status have changed
    if status or reason:
        if status is None:
            status = 'no change'
        if reason is None:
            reason = 'no change'
        l = model.DbLog(
            tree=tree.tree,
            when=relengapi_time.now(),
            who=str(current_user),
            status=status,
            reason=reason,
            tags=tags)
        session.add(l)

    tree_cache_invalidate(tree.tree)
Ejemplo n.º 8
0
def renew_tracker_pending_expiry(tracker):
    pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN)
    session = current_app.db.session('relengapi')
    logger.info("renewing tracker {} with pending expiry: {}".format(
                tracker.id, pending_expires_at), archiver_task=tracker.task_id)
    tracker.pending_expires_at = pending_expires_at
    session.commit()
Ejemplo n.º 9
0
def cleanup_old_jobs(job_status):
    session = current_app.db.session('relengapi')
    Task = tables.BadpennyTask
    Job = tables.BadpennyJob

    old_job_days = current_app.config.get('BADPENNY_OLD_JOB_DAYS', 7)
    old = time.now() - datetime.timedelta(days=old_job_days)
    deleted = 0

    for task in Task.query.all():
        # Iterate until we find a job that's not too old.  Only
        # delete on the next iteration to avoid deleting the most
        # recent job.
        to_delete = None
        for job in Job.query.filter(Job.task_id == task.id).order_by(Job.created_at):
            if to_delete:
                for log in to_delete.logs:
                    session.delete(log)
                session.delete(to_delete)
                to_delete = None
                deleted += 1

            if job.created_at < old:
                to_delete = job
            else:
                break

    if deleted:
        logger.info("removed %d old jobs", deleted)
        session.commit()
Ejemplo n.º 10
0
def cleanup_old_jobs(job_status):
    session = current_app.db.session('relengapi')
    Task = tables.BadpennyTask
    Job = tables.BadpennyJob

    old_job_days = current_app.config.get('BADPENNY_OLD_JOB_DAYS', 7)
    old = time.now() - datetime.timedelta(days=old_job_days)
    deleted = 0

    for task in Task.query.all():
        # Iterate until we find a job that's not too old.  Only
        # delete on the next iteration to avoid deleting the most
        # recent job.
        to_delete = None
        for job in Job.query.filter(Job.task_id == task.id).order_by(Job.created_at):
            if to_delete:
                for log in to_delete.logs:
                    session.delete(log)
                session.delete(to_delete)
                to_delete = None
                deleted += 1

            if job.created_at < old:
                to_delete = job
            else:
                break

    if deleted:
        logger.info("removed %d old jobs", deleted)
        session.commit()
Ejemplo n.º 11
0
def upload_complete(digest):
    """Signal that a file has been uploaded and the server should begin
    validating it.  This is merely an optimization: the server also polls
    occasionally for uploads and validates them when they appear.

    Uploads cannot be safely validated until the upload URL has expired, which
    occurs a short time after the URL is generated (currently 60 seconds but
    subject to change).

    If the upload URL has expired, then the response is an HTTP 202 indicating
    that the signal has been accepted.  If the URL has not expired, then the
    response is an HTTP 409, and the ``X-Retry-After`` header gives a time,
    in seconds, that the client should wait before trying again."""
    if not is_valid_sha512(digest):
        raise BadRequest("Invalid sha512 digest")

    # if the pending upload is still valid, then we can't check this file
    # yet, so return 409 Conflict.  If there is no PU, or it's expired,
    # then we can proceed.
    file = tables.File.query.filter(tables.File.sha512 == digest).first()
    if file:
        for pu in file.pending_uploads:
            until = pu.expires - time.now()
            if until > datetime.timedelta(0):
                # add 1 second to avoid rounding / skew errors
                hdr = {'X-Retry-After': str(1 + int(until.total_seconds()))}
                return Response(status=409, headers=hdr)

    # start a celery task in the background and return immediately
    grooming.check_file_pending_uploads.delay(digest)
    return '{}', 202
def task_status(task_id):
    """
    Check and return the current state of the create_and_upload_archive celery task with task id
    of <task_id>.

    If the task is unknown, state will be PENDING. Once the task starts it will be updated to
    STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE.

    See update_state() within create_and_upload_archive and
    http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details.

    If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3
    """
    task = create_and_upload_archive.AsyncResult(task_id)
    task_tracker = tables.ArchiverTask.query.filter(
        tables.ArchiverTask.task_id == task_id).first()
    log = logger.bind(archiver_task=task_id, archiver_task_state=task.state)
    log.info("checking status of task id {}: current state {}".format(
        task_id, task.state))
    task_info = task.info or {}
    response = {
        'state': task.state,
    }
    if task.state != 'FAILURE':
        response['status'] = task_info.get(
            'status', 'no status available at this point.')
        response['src_url'] = task_info.get('src_url', '')
        response['s3_urls'] = task_info.get('s3_urls', {})
    else:
        # something went wrong
        response['status'] = str(task.info)  # this is the exception raised
        response['src_url'] = ''
        response['s3_urls'] = {}

    # archiver does not create any custom states, so we can assume to have only the defaults:
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states
    # therefore, delete our state_id tracker from the db if the celery state is in a final state:
    # e.g. not RETRY, STARTED, or PENDING
    if task_tracker:
        if task.state in FINISHED_STATES:
            delete_tracker(task_tracker)
        elif task.state == "PENDING" and task_tracker.pending_expires_at < now(
        ):
            log.info(
                "Task {} has expired from pending too long. Re-creating task".
                format(task.id))
            renew_tracker_pending_expiry(
                task_tracker)  # let exceptions bubble up before moving on
            create_and_upload_archive.apply_async(
                args=[task_tracker.src_url, task_tracker.s3_key],
                task_id=task.id)
            response['state'] = 'RETRY'
            response[
                'status'] = 'Task has expired from pending for too long. Re-creating task.'
        elif task_tracker.state != task.state:
            update_tracker_state(task_tracker, task.state)

    return MozharnessArchiveTask(**response)
Ejemplo n.º 13
0
    def run_task(self, task):
        """Actually run a task, inserting a DB row and generating the celery task."""
        job = tables.BadpennyJob(
            task_id=task.task_id,
            created_at=time.now())
        current_app.db.session('relengapi').add(job)
        current_app.db.session('relengapi').commit()

        execution.submit_job(task_name=task.name, job_id=job.id)
Ejemplo n.º 14
0
def add_batch_to_db(app, author, message, files):
    with app.app_context():
        session = app.db.session("relengapi")
        batch = tables.Batch(author=author, message=message, uploaded=relengapi_time.now())
        session.add(batch)
        for filename, file in files.iteritems():
            session.add(tables.BatchFile(filename=filename, batch=batch, file=file))
        session.commit()
        return batch
Ejemplo n.º 15
0
    def run(self, parser, args):
        logger.info("Synchronizing tasks into the DB")
        self.sync_tasks()

        logger.info("Creating jobs for overdue tasks")
        now = time.now()
        for task in self.runnable_tasks(now):
            logger.info("Running %r", task.name)
            self.run_task(task)
Ejemplo n.º 16
0
    def run(self, parser, args):
        logger.info("Synchronizing tasks into the DB")
        self.sync_tasks()

        logger.info("Creating jobs for overdue tasks")
        now = time.now()
        for task in self.runnable_tasks(now):
            logger.info("Running %r", task.name)
            self.run_task(task)
Ejemplo n.º 17
0
def add_batch_to_db(app, author, message, files):
    with app.app_context():
        session = app.db.session('relengapi')
        batch = tables.Batch(author=author, message=message,
                             uploaded=relengapi_time.now())
        session.add(batch)
        for filename, file in files.iteritems():
            session.add(tables.BatchFile(filename=filename, batch=batch, file=file))
        session.commit()
        return batch
Ejemplo n.º 18
0
    def _finish(self, successful):
        session = current_app.db.session('relengapi')

        self.job.completed_at = time.now()
        self.job.successful = successful
        if self._log_output:
            content = u'\n'.join(self._log_output)
            l = tables.BadpennyJobLog(id=self.job.id, content=content)
            session.add(l)
        session.commit()
Ejemplo n.º 19
0
def test_check_pending_upload_not_expired(app):
    """check_pending_upload doesn't check anything if the URL isn't expired yet"""
    with app.app_context(), set_time():
        expires = time.now() + timedelta(seconds=10)  # 10s shy
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(len(tables.PendingUpload.query.all()), 1)  # PU still exists
def cleanup_old_tasks(job_status):
    """delete any tracker task if it is older than the time a task can live for."""
    session = current_app.db.session(tables.DB_DECLARATIVE_BASE)
    expiry_cutoff = now() - datetime.timedelta(seconds=TASK_TIME_OUT)
    table = tables.ArchiverTask
    for tracker in session.query(table).order_by(table.created_at):
        if tracker.created_at < expiry_cutoff:
            delete_tracker(tracker)
        else:
            break
Ejemplo n.º 21
0
    def _finish(self, successful):
        session = current_app.db.session('relengapi')

        self.job.completed_at = time.now()
        self.job.successful = successful
        if self._log_output:
            content = u'\n'.join(self._log_output)
            l = tables.BadpennyJobLog(id=self.job.id, content=content)
            session.add(l)
        session.commit()
Ejemplo n.º 22
0
def test_check_pending_upload_bad_region(app):
    """check_pending_upload deletes a pending upload with a bad region"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-1')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
Ejemplo n.º 23
0
def cleanup_old_tasks(job_status):
    """delete any tracker task if it is older than the time a task can live for."""
    session = current_app.db.session('relengapi')
    expiry_cutoff = now() - datetime.timedelta(seconds=TASK_TIME_OUT)
    table = tables.ArchiverTask
    for tracker in session.query(table).order_by(table.created_at):
        if tracker.created_at < expiry_cutoff:
            delete_tracker(tracker)
        else:
            break
Ejemplo n.º 24
0
def test_check_pending_upload_bad_region(app):
    """check_pending_upload deletes a pending upload with a bad region"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-1')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
Ejemplo n.º 25
0
def test_check_pending_upload_not_expired(app):
    """check_pending_upload doesn't check anything if the URL isn't expired yet"""
    with app.app_context(), set_time():
        expires = time.now() + timedelta(seconds=10)  # 10s shy
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(len(tables.PendingUpload.query.all()), 1)  # PU still exists
Ejemplo n.º 26
0
def test_check_file_pending_uploads(app):
    """check_file_pending_uploads calls check_pending_upload for each PU for the file"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        with mock.patch('relengapi.blueprints.tooltool.grooming.check_pending_upload') as cpu:
            pending_uploads = []
            cpu.side_effect = lambda sess, pu: pending_uploads.append(pu)
            grooming.check_file_pending_uploads(DATA_DIGEST)
            assert len(pending_uploads) == 1
Ejemplo n.º 27
0
 def _finish(self, successful):
     self._update_job({
         tables.BadpennyJob.completed_at: time.now(),
         tables.BadpennyJob.successful: successful,
     })
     if self._log_output:
         session = current_app.db.session('relengapi')
         content = u'\n'.join(self._log_output)
         l = tables.BadpennyJobLog(id=self.job_id, content=content)
         session.add(l)
         session.commit()
 def _finish(self, successful):
     self._update_job({
         tables.BadpennyJob.completed_at: time.now(),
         tables.BadpennyJob.successful: successful,
     })
     if self._log_output:
         session = current_app.db.session('relengapi')
         content = u'\n'.join(self._log_output)
         l = tables.BadpennyJobLog(id=self.job_id, content=content)
         session.add(l)
         session.commit()
Ejemplo n.º 29
0
def test_check_file_pending_uploads(app):
    """check_file_pending_uploads calls check_pending_upload for each PU for the file"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        with mock.patch('relengapi.blueprints.tooltool.grooming.check_pending_upload') as cpu:
            pending_uploads = []
            cpu.side_effect = lambda sess, pu: pending_uploads.append(pu)
            grooming.check_file_pending_uploads(DATA_DIGEST)
            assert len(pending_uploads) == 1
Ejemplo n.º 30
0
def test_check_pending_upload_no_upload(app):
    """check_pending_upload leaves the PU in place if the upload is
    not complete"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        # PU has not been deleted
        assert tables.PendingUpload.query.first().file.sha512 == DATA_DIGEST
Ejemplo n.º 31
0
def test_check_pending_upload_no_upload(app):
    """check_pending_upload leaves the PU in place if the upload is
    not complete"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        # PU has not been deleted
        assert tables.PendingUpload.query.first().file.sha512 == DATA_DIGEST
Ejemplo n.º 32
0
def update_trees(body):
    """
    Update trees' status.

    If the update indicates that the previous state should be saved, then a new
    change will be added to the stack containing the previous status and
    reason.  In this case, both reason and status must be supplied.

    The `tags` property must not be empty if `status` is `closed`.
    """
    session = current_app.db.session('relengapi')
    trees = [session.query(model.DbTree).get(t) for t in body.trees]
    if not all(trees):
        raise NotFound("one or more trees not found")

    if body.status == 'closed' and not body.tags:
        raise BadRequest("tags are required when closing a tree")

    if body.remember:
        if body.status is Unset or body.reason is Unset:
            raise BadRequest("must specify status and reason to remember the change")
        # add a new stack entry with the new and existing states
        ch = model.DbStatusChange(
            who=str(current_user),
            reason=body.reason,
            when=relengapi_time.now(),
            status=body.status)
        for tree in trees:
            stt = model.DbStatusChangeTree(
                tree=tree.tree,
                last_state=json.dumps(
                    {'status': tree.status, 'reason': tree.reason}))
            ch.trees.append(stt)
        session.add(ch)

    # update the trees as requested
    def unset_to_none(x):
        return x if x is not Unset else None
    new_status = unset_to_none(body.status)
    new_reason = unset_to_none(body.reason)
    new_motd = unset_to_none(body.message_of_the_day)
    new_tags = unset_to_none(body.tags) or []

    for tree in trees:
        update_tree_status(session, tree,
                           status=new_status,
                           reason=new_reason,
                           message_of_the_day=new_motd,
                           tags=new_tags)

    session.commit()
    return None, 204
Ejemplo n.º 33
0
def test_check_pending_upload_success(app):
    """check_pending_upload deletes the PU and adds a FileInstance if valid"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA)
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        eq_(len(tables.File.query.first().instances), 1)  # FileInstance exists
        assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 34
0
def test_check_pending_upload_not_valid(app):
    """check_pending_upload deletes the PU and the key if the upload is
    invalid."""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, 'xxx')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        assert not key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 35
0
def test_check_pending_upload_success(app):
    """check_pending_upload deletes the PU and adds a FileInstance if valid"""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA)
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        eq_(len(tables.File.query.first().instances), 1)  # FileInstance exists
        assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 36
0
def test_check_pending_upload_not_valid(app):
    """check_pending_upload deletes the PU and the key if the upload is
    invalid."""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, 'xxx')
        session = app.db.session('relengapi')
        grooming.check_pending_upload(session, pu_row)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        assert not key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 37
0
def run_task_now(task_name):
    """Force the given badpenny task to run now."""
    t = tables.BadpennyTask.query.filter(
        tables.BadpennyTask.name == task_name).first()
    if not t:
        raise NotFound

    session = current_app.db.session('relengapi')
    job = tables.BadpennyJob(task=t, created_at=time.now())
    session.add(job)
    session.commit()

    execution.submit_job(task_name=t.name, job_id=job.id)
    return job.to_jsonjob()
Ejemplo n.º 38
0
def run_task_now(task_name):
    """Force the given badpenny task to run now."""
    t = tables.BadpennyTask.query.filter(
        tables.BadpennyTask.name == task_name).first()
    if not t:
        raise NotFound

    session = current_app.db.session('relengapi')
    job = tables.BadpennyJob(task=t, created_at=time.now())
    session.add(job)
    session.commit()

    execution.submit_job(task_name=t.name, job_id=job.id)
    return job.to_jsonjob()
Ejemplo n.º 39
0
def task_status(task_id):
    """
    Check and return the current state of the create_and_upload_archive celery task with task id
    of <task_id>.

    If the task is unknown, state will be PENDING. Once the task starts it will be updated to
    STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE.

    See update_state() within create_and_upload_archive and
    http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details.

    If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3
    """
    task = create_and_upload_archive.AsyncResult(task_id)
    task_tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first()
    log = logger.bind(archiver_task=task_id, archiver_task_state=task.state)
    log.info("checking status of task id {}: current state {}".format(task_id, task.state))
    task_info = task.info or {}
    response = {
        'state': task.state,
    }
    if task.state != 'FAILURE':
        response['status'] = task_info.get('status', 'no status available at this point.')
        response['src_url'] = task_info.get('src_url', '')
        response['s3_urls'] = task_info.get('s3_urls', {})
    else:
        # something went wrong
        response['status'] = str(task.info)  # this is the exception raised
        response['src_url'] = ''
        response['s3_urls'] = {}

    # archiver does not create any custom states, so we can assume to have only the defaults:
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states
    # therefore, delete our state_id tracker from the db if the celery state is in a final state:
    # e.g. not RETRY, STARTED, or PENDING
    if task_tracker:
        if task.state in FINISHED_STATES:
            delete_tracker(task_tracker)
        elif task.state == "PENDING" and task_tracker.pending_expires_at < now():
            log.info("Task {} has expired from pending too long. Re-creating task".format(task.id))
            renew_tracker_pending_expiry(task_tracker)  # let exceptions bubble up before moving on
            create_and_upload_archive.apply_async(args=[task_tracker.src_url, task_tracker.s3_key],
                                                  task_id=task.id)
            response['state'] = 'RETRY'
            response['status'] = 'Task has expired from pending for too long. Re-creating task.'
        elif task_tracker.state != task.state:
            update_tracker_state(task_tracker, task.state)

    return MozharnessArchiveTask(**response)
Ejemplo n.º 40
0
def add_file_to_db(app, content, regions=["us-east-1"], pending_regions=[], visibility="public"):
    with app.app_context():
        session = app.db.session("relengapi")
        file_row = tables.File(size=len(content), visibility=visibility, sha512=hashlib.sha512(content).hexdigest())
        session.add(file_row)
        session.commit()
        for region in regions:
            session.add(tables.FileInstance(file_id=file_row.id, region=region))
        for region in pending_regions:
            session.add(
                tables.PendingUpload(
                    file=file_row, region=region, expires=relengapi_time.now() + datetime.timedelta(seconds=60)
                )
            )
        session.commit()

        return file_row
Ejemplo n.º 41
0
def test_upload_batch_success_existing_pending_upload(client, app):
    """A successful POST to /upload updates the 'expires' column of any relevant
    pending uploads."""
    with set_time(NOW - 30):
        add_file_to_db(app, ONE, regions=[], pending_regions=["us-east-1"])
    batch = mkbatch()
    with set_time():
        with not_so_random_choice():
            resp = upload_batch(client, batch)
        result = assert_batch_response(
            resp, files={"one": {"algorithm": "sha512", "size": len(ONE), "digest": ONE_DIGEST}}
        )
        assert_signed_url(result["files"]["one"]["put_url"], ONE_DIGEST, method="PUT", expires_in=60)
        assert_pending_upload(
            app, ONE_DIGEST, "us-east-1", expires=relengapi_time.now() + datetime.timedelta(seconds=60)
        )
        assert_batch_row(app, result["id"], files=[("one", len(ONE), ONE_DIGEST, [])])
Ejemplo n.º 42
0
def test_check_pending_upload_race(app):
    """If check_pending_upload fails to add a file instance because it already
    exists, as might happen when the function races with itself, the function
    still succeeds."""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA)
        session = app.db.session('relengapi')

        def test_shim():
            session.add(tables.FileInstance(file=file_row, region='us-west-2'))
            session.commit()
        grooming.check_pending_upload(session, pu_row, _test_shim=test_shim)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        eq_(len(tables.File.query.first().instances), 1)  # FileInstance exists
        assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 43
0
def add_file_to_db(app, content, regions=['us-east-1'],
                   pending_regions=[], visibility='public'):
    with app.app_context():
        session = app.db.session('relengapi')
        file_row = tables.File(size=len(content),
                               visibility=visibility,
                               sha512=hashlib.sha512(content).hexdigest())
        session.add(file_row)
        session.commit()
        for region in regions:
            session.add(tables.FileInstance(
                file_id=file_row.id, region=region))
        for region in pending_regions:
            session.add(tables.PendingUpload(
                file=file_row, region=region,
                expires=relengapi_time.now() + datetime.timedelta(seconds=60)))
        session.commit()

        return file_row
Ejemplo n.º 44
0
def test_upload_batch_success_existing_pending_upload(client, app):
    """A successful POST to /upload updates the 'expires' column of any relevant
    pending uploads."""
    with set_time(NOW - 30):
        add_file_to_db(app, ONE, regions=[], pending_regions=['us-east-1'])
    batch = mkbatch()
    with set_time():
        with not_so_random_choice():
            resp = upload_batch(client, batch)
        result = assert_batch_response(resp, files={
            'one': {'algorithm': 'sha512',
                    'size': len(ONE),
                    'digest': ONE_DIGEST}})
        assert_signed_url(result['files']['one']['put_url'], ONE_DIGEST,
                          method='PUT', expires_in=60)
        assert_pending_upload(
            app, ONE_DIGEST, 'us-east-1',
            expires=relengapi_time.now() + datetime.timedelta(seconds=60))
        assert_batch_row(
            app, result['id'], files=[('one', len(ONE), ONE_DIGEST, [])])
Ejemplo n.º 45
0
def test_check_pending_upload_race(app):
    """If check_pending_upload fails to add a file instance because it already
    exists, as might happen when the function races with itself, the function
    still succeeds."""
    with app.app_context(), set_time():
        expires = time.now() - timedelta(seconds=90)
        pu_row, file_row = add_pending_upload_and_file_row(
            len(DATA), DATA_DIGEST, expires, 'us-west-2')
        make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA)
        session = app.db.session('relengapi')

        def test_shim():
            session.add(tables.FileInstance(file=file_row, region='us-west-2'))
            session.commit()

        grooming.check_pending_upload(session, pu_row, _test_shim=test_shim)
        session.commit()
        eq_(tables.PendingUpload.query.all(), [])  # PU is deleted
        eq_(len(tables.File.query.first().instances), 1)  # FileInstance exists
        assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
Ejemplo n.º 46
0
def cleanup_old_jobs(job_status):
    session = current_app.db.session('relengapi')
    Task = tables.BadpennyTask

    old_job_days = current_app.config.get('BADPENNY_OLD_JOB_DAYS', 7)
    old = time.now() - datetime.timedelta(days=old_job_days)
    deleted = 0

    for task in Task.query.all():
        # consider all but the most recent job
        jobs = reversed(task.jobs[1:])
        for job in jobs:
            if job.created_at < old:
                for log in job.logs:
                    session.delete(log)
                session.delete(job)
                deleted += 1

    if deleted:
        logger.info("removed %d old jobs", deleted)
        session.commit()
Ejemplo n.º 47
0
def test_file_batches_relationship(app):
    with app.app_context():
        session = app.db.session('tooltool')

        file = tables.File(size=100, sha512='abcd', visibility='internal')
        session.add(file)

        batch = tables.Batch(
            uploaded=time.now(), author="dustin", message="hi")
        session.add(batch)

        bf = tables.BatchFile(batch=batch, file=file, filename="foo.txt")
        session.add(bf)

        session.commit()

    with app.app_context():
        file = tables.File.query.first()
        eq_(file.batches['foo.txt'].message, 'hi')

    with app.app_context():
        batch = tables.Batch.query.first()
        eq_(batch.files['foo.txt'].sha512, 'abcd')
Ejemplo n.º 48
0
def test_now():
    n = time.now()
    eq_(n.tzinfo, pytz.UTC)
Ejemplo n.º 49
0
def upload_batch(region=None, body=None):
    """Create a new upload batch.  The response object will contain a
    ``put_url`` for each file which needs to be uploaded -- which may not be
    all!  The caller is then responsible for uploading to those URLs.  The
    resulting signed URLs are valid for one hour, so uploads should begin
    within that timeframe.  Consider using Amazon's MD5-verification
    capabilities to ensure that the uploaded files are transferred correctly,
    although the tooltool server will verify the integrity anyway.  The
    upload must have the header ``Content-Type: application/octet-stream```.

    The query argument ``region=us-west-1`` indicates a preference for URLs
    in that region, although if the region is not available then URLs in
    other regions may be returned.

    The returned URLs are only valid for 60 seconds, so all upload requests
    must begin within that timeframe.  Clients should therefore perform all
    uploads in parallel, rather than sequentially.  This limitation is in
    place to prevent malicious modification of files after they have been
    verified."""
    region, bucket = get_region_and_bucket(region)

    if not body.message:
        raise BadRequest("message must be non-empty")

    if not body.files:
        raise BadRequest("a batch must include at least one file")

    if body.author:
        raise BadRequest("Author must not be specified for upload")
    try:
        body.author = current_user.authenticated_email
    except AttributeError:
        raise BadRequest("Could not determine authenticated username")

    # verify permissions based on visibilities
    visibilities = set(f.visibility for f in body.files.itervalues())
    for v in visibilities:
        prm = p.get('tooltool.upload.{}'.format(v))
        if not prm or not prm.can():
            raise Forbidden("no permission to upload {} files".format(v))

    session = g.db.session('relengapi')
    batch = tables.Batch(uploaded=time.now(),
                         author=body.author,
                         message=body.message)

    s3 = current_app.aws.connect_to('s3', region)
    for filename, info in body.files.iteritems():
        log = logger.bind(tooltool_sha512=info.digest,
                          tooltool_operation='upload',
                          tooltool_batch_id=batch.id,
                          mozdef=True)
        if info.algorithm != 'sha512':
            raise BadRequest("'sha512' is the only allowed digest algorithm")
        if not is_valid_sha512(info.digest):
            raise BadRequest("Invalid sha512 digest")
        digest = info.digest
        file = tables.File.query.filter(tables.File.sha512 == digest).first()
        if file and file.visibility != info.visibility:
            raise BadRequest("Cannot change a file's visibility level")
        if file and file.instances != []:
            if file.size != info.size:
                raise BadRequest("Size mismatch for {}".format(filename))
        else:
            if not file:
                file = tables.File(sha512=digest,
                                   visibility=info.visibility,
                                   size=info.size)
                session.add(file)
            log.info(
                "generating signed S3 PUT URL to {} for {}; expiring in {}s".
                format(info.digest[:10], current_user, UPLOAD_EXPIRES_IN))
            info.put_url = s3.generate_url(
                method='PUT',
                expires_in=UPLOAD_EXPIRES_IN,
                bucket=bucket,
                key=util.keyname(info.digest),
                headers={'Content-Type': 'application/octet-stream'})
            # The PendingUpload row needs to reflect the updated expiration
            # time, even if there's an existing pending upload that expires
            # earlier.  The `merge` method does a SELECT and then either UPDATEs
            # or INSERTs the row.  However, merge needs the file_id, rather than
            # just a reference to the file object; and for that, we need to flush
            # the inserted file.
            session.flush()
            pu = tables.PendingUpload(
                file_id=file.id,
                region=region,
                expires=time.now() +
                datetime.timedelta(seconds=UPLOAD_EXPIRES_IN))
            session.merge(pu)
        session.add(tables.BatchFile(filename=filename, file=file,
                                     batch=batch))
    session.add(batch)
    session.commit()

    body.id = batch.id
    return body
Ejemplo n.º 50
0
 def _start(self):
     self.job.started_at = time.now()
     current_app.db.session('relengapi').commit()
def test_now():
    n = time.now()
    eq_(n.tzinfo, pytz.UTC)
Ejemplo n.º 52
0
 def _start(self):
     self._update_job({tables.BadpennyJob.started_at: time.now()})
     current_app.db.session('relengapi').commit()
Ejemplo n.º 53
0
 def _start(self):
     self.job.started_at = time.now()
     current_app.db.session('relengapi').commit()
 def _start(self):
     self._update_job({tables.BadpennyJob.started_at: time.now()})
     current_app.db.session('relengapi').commit()