def check_pending_upload(session, pu, _test_shim=lambda: None): # we can check the upload any time between the expiration of the URL # (after which the user can't make any more changes, but the upload # may yet be incomplete) and 1 day afterward (ample time for the upload # to complete) sha512 = pu.file.sha512 size = pu.file.size log = logger.bind(tooltool_sha512=sha512) if time.now() < pu.expires: # URL is not expired yet return elif time.now() > pu.expires + timedelta(days=1): # Upload will probably never complete log.info( "Deleting abandoned pending upload for {}".format(sha512)) session.delete(pu) return # connect and see if the file exists.. s3 = current_app.aws.connect_to('s3', pu.region) cfg = current_app.config.get('TOOLTOOL_REGIONS') if not cfg or pu.region not in cfg: log.warning("Pending upload for {} was to an un-configured " "region".format(sha512)) session.delete(pu) return bucket = s3.get_bucket(cfg[pu.region], validate=False) key = bucket.get_key(util.keyname(sha512)) if not key: # not uploaded yet return # commit the session before verifying the file instance, since the # DB connection may otherwise go away while we're distracted. session.commit() _test_shim() if not verify_file_instance(sha512, size, key): log.warning( "Upload of {} was invalid; deleting key".format(sha512)) key.delete() session.delete(pu) session.commit() return log.info("Upload of {} considered valid".format(sha512)) # add a file instance, but it's OK if it already exists try: tables.FileInstance(file=pu.file, region=pu.region) session.commit() except sa.exc.IntegrityError: session.rollback() # and delete the pending upload session.delete(pu) session.commit()
def check_pending_upload(session, pu, _test_shim=lambda: None): # we can check the upload any time between the expiration of the URL # (after which the user can't make any more changes, but the upload # may yet be incomplete) and 1 day afterward (ample time for the upload # to complete) sha512 = pu.file.sha512 size = pu.file.size log = logger.bind(tooltool_sha512=sha512, mozdef=True) if time.now() < pu.expires: # URL is not expired yet return elif time.now() > pu.expires + timedelta(days=1): # Upload will probably never complete log.info( "Deleting abandoned pending upload for {}".format(sha512)) session.delete(pu) return # connect and see if the file exists.. s3 = current_app.aws.connect_to('s3', pu.region) cfg = current_app.config.get('TOOLTOOL_REGIONS') if not cfg or pu.region not in cfg: log.warning("Pending upload for {} was to an un-configured " "region".format(sha512)) session.delete(pu) return bucket = s3.get_bucket(cfg[pu.region], validate=False) key = bucket.get_key(util.keyname(sha512)) if not key: # not uploaded yet return # commit the session before verifying the file instance, since the # DB connection may otherwise go away while we're distracted. session.commit() _test_shim() if not verify_file_instance(sha512, size, key): log.warning( "Upload of {} was invalid; deleting key".format(sha512)) key.delete() session.delete(pu) session.commit() return log.info("Upload of {} considered valid".format(sha512)) # add a file instance, but it's OK if it already exists try: tables.FileInstance(file=pu.file, region=pu.region) session.commit() except sa.exc.IntegrityError: session.rollback() # and delete the pending upload session.delete(pu) session.commit()
def upload_complete(digest): """Signal that a file has been uploaded and the server should begin validating it. This is merely an optimization: the server also polls occasionally for uploads and validates them when they appear. Uploads cannot be safely validated until the upload URL has expired, which occurs a short time after the URL is generated (currently 60 seconds but subject to change). If the upload URL has expired, then the response is an HTTP 202 indicating that the signal has been accepted. If the URL has not expired, then the response is an HTTP 409, and the ``X-Retry-After`` header gives a time, in seconds, that the client should wait before trying again.""" if not is_valid_sha512(digest): raise BadRequest("Invalid sha512 digest") # if the pending upload is still valid, then we can't check this file # yet, so return 409 Conflict. If there is no PU, or it's expired, # then we can proceed. file = tables.File.query.filter(tables.File.sha512 == digest).first() if file: for pu in file.pending_uploads: until = pu.expires - time.now() if until > datetime.timedelta(0): # add 1 second to avoid rounding / skew errors hdr = {'X-Retry-After': str(1 + int(until.total_seconds()))} return Response(status=409, headers=hdr) # start a celery task in the background and return immediately grooming.check_file_pending_uploads.delay(digest) return '{}', 202
def run_task(self, task): """Actually run a task, inserting a DB row and generating the celery task.""" job = tables.BadpennyJob(task_id=task.task_id, created_at=time.now()) current_app.db.session('relengapi').add(job) current_app.db.session('relengapi').commit() execution.submit_job(task_name=task.name, job_id=job.id)
def test_upload_batch_success_existing_pending_upload(client, app): """A successful POST to /upload updates the 'expires' column of any relevant pending uploads.""" with set_time(NOW - 30): add_file_to_db(app, ONE, regions=[], pending_regions=['us-east-1']) batch = mkbatch() with set_time(): with not_so_random_choice(): resp = upload_batch(client, batch) result = assert_batch_response(resp, files={ 'one': { 'algorithm': 'sha512', 'size': len(ONE), 'digest': ONE_DIGEST } }) assert_signed_url(result['files']['one']['put_url'], ONE_DIGEST, method='PUT', expires_in=60) assert_pending_upload(app, ONE_DIGEST, 'us-east-1', expires=relengapi_time.now() + datetime.timedelta(seconds=60)) assert_batch_row(app, result['id'], files=[('one', len(ONE), ONE_DIGEST, [])])
def get_archive(src_url, key, preferred_region): """ A generic getter for retrieving an s3 location of an archive where the archive is based off a src_url. sub-dir: hg.mozilla.org supports archives of sub directories within a repository. This flexibility allows for creating archives of only a portion of what would normally be an entire repo archive. logic flow: If their is already a key within s3, a re-direct link is given for the s3 location. If the key does not exist, download the archive from src url, upload it to s3 for each region supported and return all uploaded s3 url locations. When the key does not exist, the remaining work will be assigned to a celery background task with a url location returned immediately for obtaining task state updates. """ buckets = current_app.config['ARCHIVER_S3_BUCKETS'] random_region = buckets.keys()[randint(0, len(buckets.keys()) - 1)] # use preferred region if available otherwise choose a valid one at random region = preferred_region if preferred_region and preferred_region in buckets else random_region bucket = buckets[region] s3 = current_app.aws.connect_to('s3', region) session = current_app.db.session('relengapi') # first, see if the key exists if not s3.get_bucket(bucket).get_key(key): task_id = key.replace('/', '_') # keep things simple and avoid slashes in task url # can't use unique support: # api.pub.build.mozilla.org/docs/development/databases/#unique-row-support-get-or-create # because we want to know when the row doesn't exist before creating it tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first() if tracker and tracker.state in FINISHED_STATES: log = logger.bind(archiver_task=task_id, archiver_task_state=tracker.state) log.info('Task tracker: {} exists but finished with state: ' '{}'.format(task_id, tracker.state)) # remove tracker and try celery task again delete_tracker(tracker) tracker = None if not tracker: log = logger.bind(archiver_task=task_id) log.info("Creating new celery task and task tracker for: {}".format(task_id)) task = create_and_upload_archive.apply_async(args=[src_url, key], task_id=task_id) if task and task.id: pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN) session.add(tables.ArchiverTask(task_id=task.id, s3_key=key, created_at=now(), pending_expires_at=pending_expires_at, src_url=src_url, state="PENDING")) session.commit() else: return {}, 500 return {}, 202, {'Location': url_for('archiver.task_status', task_id=task_id)} logger.info("generating GET URL to {}, expires in {}s".format(key, GET_EXPIRES_IN)) # return 302 pointing to s3 url with archive signed_url = s3.generate_url( method='GET', expires_in=GET_EXPIRES_IN, bucket=bucket, key=key ) return redirect(signed_url)
def update_tree_status(session, tree, status=None, reason=None, tags=[], message_of_the_day=None): """Update the given tree's status; note that this does not commit the session. Supply a tree object or name.""" if status is not None: tree.status = status if reason is not None: tree.reason = reason if message_of_the_day is not None: tree.message_of_the_day = message_of_the_day # log it if the reason or status have changed if status or reason: if status is None: status = 'no change' if reason is None: reason = 'no change' l = model.DbLog( tree=tree.tree, when=relengapi_time.now(), who=str(current_user), status=status, reason=reason, tags=tags) session.add(l) tree_cache_invalidate(tree.tree)
def renew_tracker_pending_expiry(tracker): pending_expires_at = now() + datetime.timedelta(seconds=PENDING_EXPIRES_IN) session = current_app.db.session('relengapi') logger.info("renewing tracker {} with pending expiry: {}".format( tracker.id, pending_expires_at), archiver_task=tracker.task_id) tracker.pending_expires_at = pending_expires_at session.commit()
def cleanup_old_jobs(job_status): session = current_app.db.session('relengapi') Task = tables.BadpennyTask Job = tables.BadpennyJob old_job_days = current_app.config.get('BADPENNY_OLD_JOB_DAYS', 7) old = time.now() - datetime.timedelta(days=old_job_days) deleted = 0 for task in Task.query.all(): # Iterate until we find a job that's not too old. Only # delete on the next iteration to avoid deleting the most # recent job. to_delete = None for job in Job.query.filter(Job.task_id == task.id).order_by(Job.created_at): if to_delete: for log in to_delete.logs: session.delete(log) session.delete(to_delete) to_delete = None deleted += 1 if job.created_at < old: to_delete = job else: break if deleted: logger.info("removed %d old jobs", deleted) session.commit()
def task_status(task_id): """ Check and return the current state of the create_and_upload_archive celery task with task id of <task_id>. If the task is unknown, state will be PENDING. Once the task starts it will be updated to STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE. See update_state() within create_and_upload_archive and http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details. If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3 """ task = create_and_upload_archive.AsyncResult(task_id) task_tracker = tables.ArchiverTask.query.filter( tables.ArchiverTask.task_id == task_id).first() log = logger.bind(archiver_task=task_id, archiver_task_state=task.state) log.info("checking status of task id {}: current state {}".format( task_id, task.state)) task_info = task.info or {} response = { 'state': task.state, } if task.state != 'FAILURE': response['status'] = task_info.get( 'status', 'no status available at this point.') response['src_url'] = task_info.get('src_url', '') response['s3_urls'] = task_info.get('s3_urls', {}) else: # something went wrong response['status'] = str(task.info) # this is the exception raised response['src_url'] = '' response['s3_urls'] = {} # archiver does not create any custom states, so we can assume to have only the defaults: # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states # therefore, delete our state_id tracker from the db if the celery state is in a final state: # e.g. not RETRY, STARTED, or PENDING if task_tracker: if task.state in FINISHED_STATES: delete_tracker(task_tracker) elif task.state == "PENDING" and task_tracker.pending_expires_at < now( ): log.info( "Task {} has expired from pending too long. Re-creating task". format(task.id)) renew_tracker_pending_expiry( task_tracker) # let exceptions bubble up before moving on create_and_upload_archive.apply_async( args=[task_tracker.src_url, task_tracker.s3_key], task_id=task.id) response['state'] = 'RETRY' response[ 'status'] = 'Task has expired from pending for too long. Re-creating task.' elif task_tracker.state != task.state: update_tracker_state(task_tracker, task.state) return MozharnessArchiveTask(**response)
def run_task(self, task): """Actually run a task, inserting a DB row and generating the celery task.""" job = tables.BadpennyJob( task_id=task.task_id, created_at=time.now()) current_app.db.session('relengapi').add(job) current_app.db.session('relengapi').commit() execution.submit_job(task_name=task.name, job_id=job.id)
def add_batch_to_db(app, author, message, files): with app.app_context(): session = app.db.session("relengapi") batch = tables.Batch(author=author, message=message, uploaded=relengapi_time.now()) session.add(batch) for filename, file in files.iteritems(): session.add(tables.BatchFile(filename=filename, batch=batch, file=file)) session.commit() return batch
def run(self, parser, args): logger.info("Synchronizing tasks into the DB") self.sync_tasks() logger.info("Creating jobs for overdue tasks") now = time.now() for task in self.runnable_tasks(now): logger.info("Running %r", task.name) self.run_task(task)
def add_batch_to_db(app, author, message, files): with app.app_context(): session = app.db.session('relengapi') batch = tables.Batch(author=author, message=message, uploaded=relengapi_time.now()) session.add(batch) for filename, file in files.iteritems(): session.add(tables.BatchFile(filename=filename, batch=batch, file=file)) session.commit() return batch
def _finish(self, successful): session = current_app.db.session('relengapi') self.job.completed_at = time.now() self.job.successful = successful if self._log_output: content = u'\n'.join(self._log_output) l = tables.BadpennyJobLog(id=self.job.id, content=content) session.add(l) session.commit()
def test_check_pending_upload_not_expired(app): """check_pending_upload doesn't check anything if the URL isn't expired yet""" with app.app_context(), set_time(): expires = time.now() + timedelta(seconds=10) # 10s shy pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') session = app.db.session('relengapi') grooming.check_pending_upload(session, pu_row) session.commit() eq_(len(tables.PendingUpload.query.all()), 1) # PU still exists
def cleanup_old_tasks(job_status): """delete any tracker task if it is older than the time a task can live for.""" session = current_app.db.session(tables.DB_DECLARATIVE_BASE) expiry_cutoff = now() - datetime.timedelta(seconds=TASK_TIME_OUT) table = tables.ArchiverTask for tracker in session.query(table).order_by(table.created_at): if tracker.created_at < expiry_cutoff: delete_tracker(tracker) else: break
def test_check_pending_upload_bad_region(app): """check_pending_upload deletes a pending upload with a bad region""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-1') session = app.db.session('relengapi') grooming.check_pending_upload(session, pu_row) session.commit() eq_(tables.PendingUpload.query.all(), []) # PU is deleted
def cleanup_old_tasks(job_status): """delete any tracker task if it is older than the time a task can live for.""" session = current_app.db.session('relengapi') expiry_cutoff = now() - datetime.timedelta(seconds=TASK_TIME_OUT) table = tables.ArchiverTask for tracker in session.query(table).order_by(table.created_at): if tracker.created_at < expiry_cutoff: delete_tracker(tracker) else: break
def test_check_file_pending_uploads(app): """check_file_pending_uploads calls check_pending_upload for each PU for the file""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') with mock.patch('relengapi.blueprints.tooltool.grooming.check_pending_upload') as cpu: pending_uploads = [] cpu.side_effect = lambda sess, pu: pending_uploads.append(pu) grooming.check_file_pending_uploads(DATA_DIGEST) assert len(pending_uploads) == 1
def _finish(self, successful): self._update_job({ tables.BadpennyJob.completed_at: time.now(), tables.BadpennyJob.successful: successful, }) if self._log_output: session = current_app.db.session('relengapi') content = u'\n'.join(self._log_output) l = tables.BadpennyJobLog(id=self.job_id, content=content) session.add(l) session.commit()
def test_check_pending_upload_no_upload(app): """check_pending_upload leaves the PU in place if the upload is not complete""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') session = app.db.session('relengapi') grooming.check_pending_upload(session, pu_row) session.commit() # PU has not been deleted assert tables.PendingUpload.query.first().file.sha512 == DATA_DIGEST
def update_trees(body): """ Update trees' status. If the update indicates that the previous state should be saved, then a new change will be added to the stack containing the previous status and reason. In this case, both reason and status must be supplied. The `tags` property must not be empty if `status` is `closed`. """ session = current_app.db.session('relengapi') trees = [session.query(model.DbTree).get(t) for t in body.trees] if not all(trees): raise NotFound("one or more trees not found") if body.status == 'closed' and not body.tags: raise BadRequest("tags are required when closing a tree") if body.remember: if body.status is Unset or body.reason is Unset: raise BadRequest("must specify status and reason to remember the change") # add a new stack entry with the new and existing states ch = model.DbStatusChange( who=str(current_user), reason=body.reason, when=relengapi_time.now(), status=body.status) for tree in trees: stt = model.DbStatusChangeTree( tree=tree.tree, last_state=json.dumps( {'status': tree.status, 'reason': tree.reason})) ch.trees.append(stt) session.add(ch) # update the trees as requested def unset_to_none(x): return x if x is not Unset else None new_status = unset_to_none(body.status) new_reason = unset_to_none(body.reason) new_motd = unset_to_none(body.message_of_the_day) new_tags = unset_to_none(body.tags) or [] for tree in trees: update_tree_status(session, tree, status=new_status, reason=new_reason, message_of_the_day=new_motd, tags=new_tags) session.commit() return None, 204
def test_check_pending_upload_success(app): """check_pending_upload deletes the PU and adds a FileInstance if valid""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA) session = app.db.session('relengapi') grooming.check_pending_upload(session, pu_row) session.commit() eq_(tables.PendingUpload.query.all(), []) # PU is deleted eq_(len(tables.File.query.first().instances), 1) # FileInstance exists assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
def test_check_pending_upload_not_valid(app): """check_pending_upload deletes the PU and the key if the upload is invalid.""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, 'xxx') session = app.db.session('relengapi') grooming.check_pending_upload(session, pu_row) session.commit() eq_(tables.PendingUpload.query.all(), []) # PU is deleted assert not key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
def run_task_now(task_name): """Force the given badpenny task to run now.""" t = tables.BadpennyTask.query.filter( tables.BadpennyTask.name == task_name).first() if not t: raise NotFound session = current_app.db.session('relengapi') job = tables.BadpennyJob(task=t, created_at=time.now()) session.add(job) session.commit() execution.submit_job(task_name=t.name, job_id=job.id) return job.to_jsonjob()
def task_status(task_id): """ Check and return the current state of the create_and_upload_archive celery task with task id of <task_id>. If the task is unknown, state will be PENDING. Once the task starts it will be updated to STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE. See update_state() within create_and_upload_archive and http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details. If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3 """ task = create_and_upload_archive.AsyncResult(task_id) task_tracker = tables.ArchiverTask.query.filter(tables.ArchiverTask.task_id == task_id).first() log = logger.bind(archiver_task=task_id, archiver_task_state=task.state) log.info("checking status of task id {}: current state {}".format(task_id, task.state)) task_info = task.info or {} response = { 'state': task.state, } if task.state != 'FAILURE': response['status'] = task_info.get('status', 'no status available at this point.') response['src_url'] = task_info.get('src_url', '') response['s3_urls'] = task_info.get('s3_urls', {}) else: # something went wrong response['status'] = str(task.info) # this is the exception raised response['src_url'] = '' response['s3_urls'] = {} # archiver does not create any custom states, so we can assume to have only the defaults: # http://docs.celeryproject.org/en/latest/userguide/tasks.html#task-states # therefore, delete our state_id tracker from the db if the celery state is in a final state: # e.g. not RETRY, STARTED, or PENDING if task_tracker: if task.state in FINISHED_STATES: delete_tracker(task_tracker) elif task.state == "PENDING" and task_tracker.pending_expires_at < now(): log.info("Task {} has expired from pending too long. Re-creating task".format(task.id)) renew_tracker_pending_expiry(task_tracker) # let exceptions bubble up before moving on create_and_upload_archive.apply_async(args=[task_tracker.src_url, task_tracker.s3_key], task_id=task.id) response['state'] = 'RETRY' response['status'] = 'Task has expired from pending for too long. Re-creating task.' elif task_tracker.state != task.state: update_tracker_state(task_tracker, task.state) return MozharnessArchiveTask(**response)
def add_file_to_db(app, content, regions=["us-east-1"], pending_regions=[], visibility="public"): with app.app_context(): session = app.db.session("relengapi") file_row = tables.File(size=len(content), visibility=visibility, sha512=hashlib.sha512(content).hexdigest()) session.add(file_row) session.commit() for region in regions: session.add(tables.FileInstance(file_id=file_row.id, region=region)) for region in pending_regions: session.add( tables.PendingUpload( file=file_row, region=region, expires=relengapi_time.now() + datetime.timedelta(seconds=60) ) ) session.commit() return file_row
def test_upload_batch_success_existing_pending_upload(client, app): """A successful POST to /upload updates the 'expires' column of any relevant pending uploads.""" with set_time(NOW - 30): add_file_to_db(app, ONE, regions=[], pending_regions=["us-east-1"]) batch = mkbatch() with set_time(): with not_so_random_choice(): resp = upload_batch(client, batch) result = assert_batch_response( resp, files={"one": {"algorithm": "sha512", "size": len(ONE), "digest": ONE_DIGEST}} ) assert_signed_url(result["files"]["one"]["put_url"], ONE_DIGEST, method="PUT", expires_in=60) assert_pending_upload( app, ONE_DIGEST, "us-east-1", expires=relengapi_time.now() + datetime.timedelta(seconds=60) ) assert_batch_row(app, result["id"], files=[("one", len(ONE), ONE_DIGEST, [])])
def test_check_pending_upload_race(app): """If check_pending_upload fails to add a file instance because it already exists, as might happen when the function races with itself, the function still succeeds.""" with app.app_context(), set_time(): expires = time.now() - timedelta(seconds=90) pu_row, file_row = add_pending_upload_and_file_row( len(DATA), DATA_DIGEST, expires, 'us-west-2') make_key(app, 'us-west-2', 'tt-usw2', DATA_KEY, DATA) session = app.db.session('relengapi') def test_shim(): session.add(tables.FileInstance(file=file_row, region='us-west-2')) session.commit() grooming.check_pending_upload(session, pu_row, _test_shim=test_shim) session.commit() eq_(tables.PendingUpload.query.all(), []) # PU is deleted eq_(len(tables.File.query.first().instances), 1) # FileInstance exists assert key_exists(app, 'us-west-2', 'tt-usw2', DATA_KEY)
def add_file_to_db(app, content, regions=['us-east-1'], pending_regions=[], visibility='public'): with app.app_context(): session = app.db.session('relengapi') file_row = tables.File(size=len(content), visibility=visibility, sha512=hashlib.sha512(content).hexdigest()) session.add(file_row) session.commit() for region in regions: session.add(tables.FileInstance( file_id=file_row.id, region=region)) for region in pending_regions: session.add(tables.PendingUpload( file=file_row, region=region, expires=relengapi_time.now() + datetime.timedelta(seconds=60))) session.commit() return file_row
def test_upload_batch_success_existing_pending_upload(client, app): """A successful POST to /upload updates the 'expires' column of any relevant pending uploads.""" with set_time(NOW - 30): add_file_to_db(app, ONE, regions=[], pending_regions=['us-east-1']) batch = mkbatch() with set_time(): with not_so_random_choice(): resp = upload_batch(client, batch) result = assert_batch_response(resp, files={ 'one': {'algorithm': 'sha512', 'size': len(ONE), 'digest': ONE_DIGEST}}) assert_signed_url(result['files']['one']['put_url'], ONE_DIGEST, method='PUT', expires_in=60) assert_pending_upload( app, ONE_DIGEST, 'us-east-1', expires=relengapi_time.now() + datetime.timedelta(seconds=60)) assert_batch_row( app, result['id'], files=[('one', len(ONE), ONE_DIGEST, [])])
def cleanup_old_jobs(job_status): session = current_app.db.session('relengapi') Task = tables.BadpennyTask old_job_days = current_app.config.get('BADPENNY_OLD_JOB_DAYS', 7) old = time.now() - datetime.timedelta(days=old_job_days) deleted = 0 for task in Task.query.all(): # consider all but the most recent job jobs = reversed(task.jobs[1:]) for job in jobs: if job.created_at < old: for log in job.logs: session.delete(log) session.delete(job) deleted += 1 if deleted: logger.info("removed %d old jobs", deleted) session.commit()
def test_file_batches_relationship(app): with app.app_context(): session = app.db.session('tooltool') file = tables.File(size=100, sha512='abcd', visibility='internal') session.add(file) batch = tables.Batch( uploaded=time.now(), author="dustin", message="hi") session.add(batch) bf = tables.BatchFile(batch=batch, file=file, filename="foo.txt") session.add(bf) session.commit() with app.app_context(): file = tables.File.query.first() eq_(file.batches['foo.txt'].message, 'hi') with app.app_context(): batch = tables.Batch.query.first() eq_(batch.files['foo.txt'].sha512, 'abcd')
def test_now(): n = time.now() eq_(n.tzinfo, pytz.UTC)
def upload_batch(region=None, body=None): """Create a new upload batch. The response object will contain a ``put_url`` for each file which needs to be uploaded -- which may not be all! The caller is then responsible for uploading to those URLs. The resulting signed URLs are valid for one hour, so uploads should begin within that timeframe. Consider using Amazon's MD5-verification capabilities to ensure that the uploaded files are transferred correctly, although the tooltool server will verify the integrity anyway. The upload must have the header ``Content-Type: application/octet-stream```. The query argument ``region=us-west-1`` indicates a preference for URLs in that region, although if the region is not available then URLs in other regions may be returned. The returned URLs are only valid for 60 seconds, so all upload requests must begin within that timeframe. Clients should therefore perform all uploads in parallel, rather than sequentially. This limitation is in place to prevent malicious modification of files after they have been verified.""" region, bucket = get_region_and_bucket(region) if not body.message: raise BadRequest("message must be non-empty") if not body.files: raise BadRequest("a batch must include at least one file") if body.author: raise BadRequest("Author must not be specified for upload") try: body.author = current_user.authenticated_email except AttributeError: raise BadRequest("Could not determine authenticated username") # verify permissions based on visibilities visibilities = set(f.visibility for f in body.files.itervalues()) for v in visibilities: prm = p.get('tooltool.upload.{}'.format(v)) if not prm or not prm.can(): raise Forbidden("no permission to upload {} files".format(v)) session = g.db.session('relengapi') batch = tables.Batch(uploaded=time.now(), author=body.author, message=body.message) s3 = current_app.aws.connect_to('s3', region) for filename, info in body.files.iteritems(): log = logger.bind(tooltool_sha512=info.digest, tooltool_operation='upload', tooltool_batch_id=batch.id, mozdef=True) if info.algorithm != 'sha512': raise BadRequest("'sha512' is the only allowed digest algorithm") if not is_valid_sha512(info.digest): raise BadRequest("Invalid sha512 digest") digest = info.digest file = tables.File.query.filter(tables.File.sha512 == digest).first() if file and file.visibility != info.visibility: raise BadRequest("Cannot change a file's visibility level") if file and file.instances != []: if file.size != info.size: raise BadRequest("Size mismatch for {}".format(filename)) else: if not file: file = tables.File(sha512=digest, visibility=info.visibility, size=info.size) session.add(file) log.info( "generating signed S3 PUT URL to {} for {}; expiring in {}s". format(info.digest[:10], current_user, UPLOAD_EXPIRES_IN)) info.put_url = s3.generate_url( method='PUT', expires_in=UPLOAD_EXPIRES_IN, bucket=bucket, key=util.keyname(info.digest), headers={'Content-Type': 'application/octet-stream'}) # The PendingUpload row needs to reflect the updated expiration # time, even if there's an existing pending upload that expires # earlier. The `merge` method does a SELECT and then either UPDATEs # or INSERTs the row. However, merge needs the file_id, rather than # just a reference to the file object; and for that, we need to flush # the inserted file. session.flush() pu = tables.PendingUpload( file_id=file.id, region=region, expires=time.now() + datetime.timedelta(seconds=UPLOAD_EXPIRES_IN)) session.merge(pu) session.add(tables.BatchFile(filename=filename, file=file, batch=batch)) session.add(batch) session.commit() body.id = batch.id return body
def _start(self): self.job.started_at = time.now() current_app.db.session('relengapi').commit()
def _start(self): self._update_job({tables.BadpennyJob.started_at: time.now()}) current_app.db.session('relengapi').commit()