def test_replicate_file(app): """Replicating a file initiates copy operations from regions where the file exists to regions where it does not.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_bucket(app, 'us-west-2', 'tt-usw2') grooming.replicate_file(app.db.session('relengapi'), file) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2']) assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST)) assert key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
def test_replicate_file(app): """Replicating a file initiates copy operations from regions where the file exists to regions where it does not.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_bucket(app, 'us-west-2', 'tt-usw2') grooming.replicate_file(app.db.session('relengapi'), file) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2']) assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST)) assert key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST))
def test_replicate_file_already_exists(app): """If a target object already exists in S3 during replication, it is deleted rather than being trusted to be correct.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_key(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST), "BAD") grooming.replicate_file(app.db.session('relengapi'), file) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2']) assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST)) k = key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST)) eq_(k.get_contents_as_string(), DATA) # not "BAD"
def test_replicate_file_already_exists(app): """If a target object already exists in S3 during replication, it is deleted rather than being trusted to be correct.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_key(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST), "BAD") grooming.replicate_file(app.db.session('relengapi'), file) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2']) assert key_exists(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST)) k = key_exists(app, 'us-west-2', 'tt-usw2', util.keyname(DATA_DIGEST)) eq_(k.get_contents_as_string(), DATA) # not "BAD"
def replicate_file(session, file): config = current_app.config['TOOLTOOL_REGIONS'] regions = set(config) file_regions = set([i.region for i in file.instances]) # only use configured source regions; if a region is removed # from the configuration, we can't copy from it. source_regions = file_regions & regions if not source_regions: # this should only happen when the only region containing a # file is removed from the configuration log.warning("no source regions for {}".format(file.sha512)) return source_region = source_regions.pop() source_bucket = config[source_region] target_regions = regions - file_regions log.info("replicating {} from {} to {}".format( file.sha512, source_region, ', '.join(target_regions))) key_name = util.keyname(file.sha512) for target_region in target_regions: target_bucket = config[target_region] conn = current_app.aws.connect_to('s3', target_region) bucket = conn.get_bucket(target_bucket) # commit the session before replicating, since the DB connection may # otherwise go away while we're distracted. session.commit() bucket.copy_key(new_key_name=key_name, src_key_name=key_name, src_bucket_name=source_bucket, storage_class='STANDARD', preserve_acl=False) session.add(tables.FileInstance(file=file, region=target_region)) session.commit()
def check_pending_upload(session, pu, _test_shim=lambda: None): # we can check the upload any time between the expiration of the URL # (after which the user can't make any more changes, but the upload # may yet be incomplete) and 1 day afterward (ample time for the upload # to complete) sha512 = pu.file.sha512 size = pu.file.size log = logger.bind(tooltool_sha512=sha512) if time.now() < pu.expires: # URL is not expired yet return elif time.now() > pu.expires + timedelta(days=1): # Upload will probably never complete log.info( "Deleting abandoned pending upload for {}".format(sha512)) session.delete(pu) return # connect and see if the file exists.. s3 = current_app.aws.connect_to('s3', pu.region) cfg = current_app.config.get('TOOLTOOL_REGIONS') if not cfg or pu.region not in cfg: log.warning("Pending upload for {} was to an un-configured " "region".format(sha512)) session.delete(pu) return bucket = s3.get_bucket(cfg[pu.region], validate=False) key = bucket.get_key(util.keyname(sha512)) if not key: # not uploaded yet return # commit the session before verifying the file instance, since the # DB connection may otherwise go away while we're distracted. session.commit() _test_shim() if not verify_file_instance(sha512, size, key): log.warning( "Upload of {} was invalid; deleting key".format(sha512)) key.delete() session.delete(pu) session.commit() return log.info("Upload of {} considered valid".format(sha512)) # add a file instance, but it's OK if it already exists try: tables.FileInstance(file=pu.file, region=pu.region) session.commit() except sa.exc.IntegrityError: session.rollback() # and delete the pending upload session.delete(pu) session.commit()
def check_pending_upload(session, pu, _test_shim=lambda: None): # we can check the upload any time between the expiration of the URL # (after which the user can't make any more changes, but the upload # may yet be incomplete) and 1 day afterward (ample time for the upload # to complete) sha512 = pu.file.sha512 size = pu.file.size log = logger.bind(tooltool_sha512=sha512, mozdef=True) if time.now() < pu.expires: # URL is not expired yet return elif time.now() > pu.expires + timedelta(days=1): # Upload will probably never complete log.info( "Deleting abandoned pending upload for {}".format(sha512)) session.delete(pu) return # connect and see if the file exists.. s3 = current_app.aws.connect_to('s3', pu.region) cfg = current_app.config.get('TOOLTOOL_REGIONS') if not cfg or pu.region not in cfg: log.warning("Pending upload for {} was to an un-configured " "region".format(sha512)) session.delete(pu) return bucket = s3.get_bucket(cfg[pu.region], validate=False) key = bucket.get_key(util.keyname(sha512)) if not key: # not uploaded yet return # commit the session before verifying the file instance, since the # DB connection may otherwise go away while we're distracted. session.commit() _test_shim() if not verify_file_instance(sha512, size, key): log.warning( "Upload of {} was invalid; deleting key".format(sha512)) key.delete() session.delete(pu) session.commit() return log.info("Upload of {} considered valid".format(sha512)) # add a file instance, but it's OK if it already exists try: tables.FileInstance(file=pu.file, region=pu.region) session.commit() except sa.exc.IntegrityError: session.rollback() # and delete the pending upload session.delete(pu) session.commit()
def add_file_to_s3(app, content, region='us-east-1'): with app.app_context(): conn = app.aws.connect_to('s3', region) bucket_name = cfg['TOOLTOOL_REGIONS'][region] try: conn.head_bucket(bucket_name) except boto.exception.S3ResponseError: conn.create_bucket(bucket_name) bucket = conn.get_bucket(bucket_name) key_name = util.keyname(hashlib.sha512(content).hexdigest()) key = bucket.new_key(key_name) key.set_contents_from_string(content)
def add_file_to_s3(app, content, region='us-east-1'): with app.app_context(): conn = app.aws.connect_to('s3', region) bucket_name = cfg['TOOLTOOL_REGIONS'][region] try: conn.head_bucket(bucket_name) except boto.exception.S3ResponseError: conn.create_bucket(bucket_name) bucket = conn.get_bucket(bucket_name) key_name = util.keyname(hashlib.sha512(content).hexdigest()) key = bucket.new_key(key_name) key.set_contents_from_string(content)
def test_delete_instances_success(app, client): """A PATCH with op=delete_instances deletes its instances.""" add_file_to_db(app, ONE, regions=["us-east-1"]) add_file_to_s3(app, ONE, region="us-east-1") resp = do_patch(client, "sha512", ONE_DIGEST, [{"op": "delete_instances"}]) assert_file_response(resp, ONE, instances=[]) with app.app_context(): # ensure instances are gone from the DB f = tables.File.query.first() eq_(f.instances, []) # and from S3 conn = app.aws.connect_to("s3", "us-east-1") key = conn.get_bucket("tt-use1").get_key(util.keyname(ONE_DIGEST)) assert not key, "key still exists"
def test_delete_instances_success(app, client): """A PATCH with op=delete_instances deletes its instances.""" add_file_to_db(app, ONE, regions=['us-east-1']) add_file_to_s3(app, ONE, region='us-east-1') resp = do_patch(client, 'sha512', ONE_DIGEST, [{'op': 'delete_instances'}]) assert_file_response(resp, ONE, instances=[]) with app.app_context(): # ensure instances are gone from the DB f = tables.File.query.first() eq_(f.instances, []) # and from S3 conn = app.aws.connect_to('s3', 'us-east-1') key = conn.get_bucket('tt-use1').get_key(util.keyname(ONE_DIGEST)) assert not key, "key still exists"
def download_file(digest, region=None): """Fetch a link to the file with the given sha512 digest. The response is a 302 redirect to a signed download URL. The query argument ``region=us-west-1`` indicates a preference for a URL in that region, although if the file is not available in tht region then a URL from another region may be returned.""" log = logger.bind(tooltool_sha512=digest, tooltool_operation='download') if not is_valid_sha512(digest): raise BadRequest("Invalid sha512 digest") # see where the file is.. tbl = tables.File file_row = tbl.query.filter(tbl.sha512 == digest).first() if not file_row or not file_row.instances: raise NotFound # check visibility allow_pub_dl = current_app.config.get( 'TOOLTOOL_ALLOW_ANONYMOUS_PUBLIC_DOWNLOAD') if file_row.visibility != 'public' or not allow_pub_dl: if not p.get('tooltool.download.{}'.format(file_row.visibility)).can(): raise Forbidden # figure out which region to use, and from there which bucket cfg = current_app.config['TOOLTOOL_REGIONS'] selected_region = None for inst in file_row.instances: if inst.region == region: selected_region = inst.region break else: # preferred region not found, so pick one from the available set selected_region = random.choice( [inst.region for inst in file_row.instances]) bucket = cfg[selected_region] key = util.keyname(digest) s3 = current_app.aws.connect_to('s3', selected_region) log.info("generating signed S3 GET URL for {}.. expiring in {}s".format( digest[:10], GET_EXPIRES_IN)) signed_url = s3.generate_url(method='GET', expires_in=GET_EXPIRES_IN, bucket=bucket, key=key) return redirect(signed_url)
def assert_signed_url(url, digest, method="GET", region=None, expires_in=60, bucket=None): region = region or "us-east-1" bucket = bucket or cfg["TOOLTOOL_REGIONS"][region] if region == "us-east-1": host = "{}.s3.amazonaws.com".format(bucket) else: host = "{}.s3-{}.amazonaws.com".format(bucket, region) url = urlparse.urlparse(url) eq_(url.scheme, "https") eq_(url.netloc, host) eq_(url.path, "/" + util.keyname(digest)) query = urlparse.parse_qs(url.query) assert "Signature" in query # sadly, headers are not represented in the URL eq_(query["AWSAccessKeyId"][0], "aa") eq_(int(query["Expires"][0]), time.time() + expires_in)
def test_replicate_file_race(app): """If, while replicating a file, another replication completes and the subsequent database insert fails, the replication function nonetheless succeeds.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_bucket(app, 'us-west-2', 'tt-usw2') def test_shim(): session = app.db.session('relengapi') session.add(tables.FileInstance(file=file, region='us-west-2')) session.commit() grooming.replicate_file(app.db.session('relengapi'), file, _test_shim=test_shim) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
def assert_signed_url(url, digest, method='GET', region=None, expires_in=60, bucket=None): region = region or 'us-east-1' bucket = bucket or cfg['TOOLTOOL_REGIONS'][region] if region == 'us-east-1': host = '{}.s3.amazonaws.com'.format(bucket) else: host = '{}.s3-{}.amazonaws.com'.format(bucket, region) url = urlparse.urlparse(url) eq_(url.scheme, 'https') eq_(url.netloc, host) eq_(url.path, '/' + util.keyname(digest)) query = urlparse.parse_qs(url.query) assert 'Signature' in query # sadly, headers are not represented in the URL eq_(query['AWSAccessKeyId'][0], 'aa') eq_(int(query['Expires'][0]), time.time() + expires_in)
def assert_signed_url(url, digest, method='GET', region=None, expires_in=60, bucket=None): region = region or 'us-east-1' bucket = bucket or cfg['TOOLTOOL_REGIONS'][region] if region == 'us-east-1': host = '{}.s3.amazonaws.com'.format(bucket) else: host = '{}.s3-{}.amazonaws.com'.format(bucket, region) url = urlparse.urlparse(url) eq_(url.scheme, 'https') eq_(url.netloc, host) eq_(url.path, '/' + util.keyname(digest)) query = urlparse.parse_qs(url.query) assert 'Signature' in query # sadly, headers are not represented in the URL eq_(query['AWSAccessKeyId'][0], 'aa') eq_(int(query['Expires'][0]), time.time() + expires_in)
def test_replicate_file_race(app): """If, while replicating a file, another replication completes and the subsequent database insert fails, the replication function nonetheless succeeds.""" with app.app_context(): file = add_file_row(len(DATA), DATA_DIGEST, instances=['us-east-1']) make_key(app, 'us-east-1', 'tt-use1', util.keyname(DATA_DIGEST), DATA) make_bucket(app, 'us-west-2', 'tt-usw2') def test_shim(): session = app.db.session('relengapi') session.add(tables.FileInstance(file=file, region='us-west-2')) session.commit() grooming.replicate_file(app.db.session('relengapi'), file, _test_shim=test_shim) assert_file_instances(app, DATA_DIGEST, ['us-east-1', 'us-west-2'])
def download_file(digest, region=None): """Fetch a link to the file with the given sha512 digest. The response is a 302 redirect to a signed download URL. The query argument ``region=us-west-1`` indicates a preference for a URL in that region, although if the file is not available in tht region then a URL from another region may be returned.""" log = logger.bind(tooltool_sha512=digest, tooltool_operation='download') if not is_valid_sha512(digest): raise BadRequest("Invalid sha512 digest") # see where the file is.. tbl = tables.File file_row = tbl.query.filter(tbl.sha512 == digest).first() if not file_row or not file_row.instances: raise NotFound # check visibility allow_pub_dl = current_app.config.get('TOOLTOOL_ALLOW_ANONYMOUS_PUBLIC_DOWNLOAD') if file_row.visibility != 'public' or not allow_pub_dl: if not p.get('tooltool.download.{}'.format(file_row.visibility)).can(): raise Forbidden # figure out which region to use, and from there which bucket cfg = current_app.config['TOOLTOOL_REGIONS'] selected_region = None for inst in file_row.instances: if inst.region == region: selected_region = inst.region break else: # preferred region not found, so pick one from the available set selected_region = random.choice([inst.region for inst in file_row.instances]) bucket = cfg[selected_region] key = util.keyname(digest) s3 = current_app.aws.connect_to('s3', selected_region) log.info("generating signed S3 GET URL for {}.. expiring in {}s".format( digest[:10], GET_EXPIRES_IN)) signed_url = s3.generate_url( method='GET', expires_in=GET_EXPIRES_IN, bucket=bucket, key=key) return redirect(signed_url)
def patch_file(digest, body): """Make administrative changes to an existing file. The body is a list of changes to apply, each represented by a JSON object. The object ``{"op": "delete_instances"}`` will cause all instances of the file to be deleted. The file record itself will not be deleted, as it is still a part of one or more upload batches, but until and unless someone uploads a new copy, the content will not be available for download. If the change has op ``"set_visibility"``, then the file's visibility will be set to the value given by the change's ``visibility`` attribute. For example, ``{"op": "set_visibility", "visibility": "internal"}`` will mark a file as "internal" after someone has accidentally uploaded it with public visibility. The returned File instance contains an ``instances`` attribute showing any changes.""" session = current_app.db.session('relengapi') file = session.query( tables.File).filter(tables.File.sha512 == digest).first() if not file: raise NotFound for change in body: if 'op' not in change: raise BadRequest("no op") if change['op'] == 'delete_instances': key_name = util.keyname(digest) cfg = current_app.config.get('TOOLTOOL_REGIONS') for i in file.instances: conn = current_app.aws.connect_to('s3', i.region) bucket = conn.get_bucket(cfg[i.region]) bucket.delete_key(key_name) session.delete(i) elif change['op'] == 'set_visibility': if change['visibility'] not in ('internal', 'public'): raise BadRequest("bad visibility level") file.visibility = change['visibility'] else: raise BadRequest("unknown op") session.commit() return file.to_json(include_instances=True)
def patch_file(digest, body): """Make administrative changes to an existing file. The body is a list of changes to apply, each represented by a JSON object. The object ``{"op": "delete_instances"}`` will cause all instances of the file to be deleted. The file record itself will not be deleted, as it is still a part of one or more upload batches, but until and unless someone uploads a new copy, the content will not be available for download. If the change has op ``"set_visibility"``, then the file's visibility will be set to the value given by the change's ``visibility`` attribute. For example, ``{"op": "set_visibility", "visibility": "internal"}`` will mark a file as "internal" after someone has accidentally uploaded it with public visibility. The returned File instance contains an ``instances`` attribute showing any changes.""" session = current_app.db.session('relengapi') file = session.query(tables.File).filter(tables.File.sha512 == digest).first() if not file: raise NotFound for change in body: if 'op' not in change: raise BadRequest("no op") if change['op'] == 'delete_instances': key_name = util.keyname(digest) cfg = current_app.config.get('TOOLTOOL_REGIONS') for i in file.instances: conn = current_app.aws.connect_to('s3', i.region) bucket = conn.get_bucket(cfg[i.region]) bucket.delete_key(key_name) session.delete(i) elif change['op'] == 'set_visibility': if change['visibility'] not in ('internal', 'public'): raise BadRequest("bad visibility level") file.visibility = change['visibility'] else: raise BadRequest("unknown op") session.commit() return file.to_json(include_instances=True)
def replicate_file(session, file, _test_shim=lambda: None): log = logger.bind(tooltool_sha512=file.sha512) config = current_app.config["TOOLTOOL_REGIONS"] regions = set(config) file_regions = set([i.region for i in file.instances]) # only use configured source regions; if a region is removed # from the configuration, we can't copy from it. source_regions = file_regions & regions if not source_regions: # this should only happen when the only region containing a # file is removed from the configuration log.warning("no source regions for {}".format(file.sha512)) return source_region = source_regions.pop() source_bucket = config[source_region] target_regions = regions - file_regions log.info("replicating {} from {} to {}".format(file.sha512, source_region, ", ".join(target_regions))) key_name = util.keyname(file.sha512) for target_region in target_regions: target_bucket = config[target_region] conn = current_app.aws.connect_to("s3", target_region) bucket = conn.get_bucket(target_bucket) # commit the session before replicating, since the DB connection may # otherwise go away while we're distracted. session.commit() _test_shim() bucket.copy_key( new_key_name=key_name, src_key_name=key_name, src_bucket_name=source_bucket, storage_class="STANDARD", preserve_acl=False, ) try: session.add(tables.FileInstance(file=file, region=target_region)) session.commit() except sa.exc.IntegrityError: session.rollback()
def replicate_file(session, file, _test_shim=lambda: None): log = logger.bind(tooltool_sha512=file.sha512, mozdef=True) config = current_app.config['TOOLTOOL_REGIONS'] regions = set(config) file_regions = set([i.region for i in file.instances]) # only use configured source regions; if a region is removed # from the configuration, we can't copy from it. source_regions = file_regions & regions if not source_regions: # this should only happen when the only region containing a # file is removed from the configuration log.warning("no source regions for {}".format(file.sha512)) return source_region = source_regions.pop() source_bucket = config[source_region] target_regions = regions - file_regions log.info("replicating {} from {} to {}".format(file.sha512, source_region, ', '.join(target_regions))) key_name = util.keyname(file.sha512) for target_region in target_regions: target_bucket = config[target_region] conn = current_app.aws.connect_to('s3', target_region) bucket = conn.get_bucket(target_bucket) # commit the session before replicating, since the DB connection may # otherwise go away while we're distracted. session.commit() _test_shim() bucket.copy_key(new_key_name=key_name, src_key_name=key_name, src_bucket_name=source_bucket, storage_class='STANDARD', preserve_acl=False) try: session.add(tables.FileInstance(file=file, region=target_region)) session.commit() except sa.exc.IntegrityError: session.rollback()
def upload_batch(region=None, body=None): """Create a new upload batch. The response object will contain a ``put_url`` for each file which needs to be uploaded -- which may not be all! The caller is then responsible for uploading to those URLs. The resulting signed URLs are valid for one hour, so uploads should begin within that timeframe. Consider using Amazon's MD5-verification capabilities to ensure that the uploaded files are transferred correctly, although the tooltool server will verify the integrity anyway. The upload must have the header ``Content-Type: application/octet-stream```. The query argument ``region=us-west-1`` indicates a preference for URLs in that region, although if the region is not available then URLs in other regions may be returned. The returned URLs are only valid for 60 seconds, so all upload requests must begin within that timeframe. Clients should therefore perform all uploads in parallel, rather than sequentially. This limitation is in place to prevent malicious modification of files after they have been verified.""" region, bucket = get_region_and_bucket(region) if not body.message: raise BadRequest("message must be non-empty") if not body.files: raise BadRequest("a batch must include at least one file") if body.author: raise BadRequest("Author must not be specified for upload") try: body.author = current_user.authenticated_email except AttributeError: raise BadRequest("Could not determine authenticated username") # verify permissions based on visibilities visibilities = set(f.visibility for f in body.files.itervalues()) for v in visibilities: prm = p.get('tooltool.upload.{}'.format(v)) if not prm or not prm.can(): raise Forbidden("no permission to upload {} files".format(v)) session = g.db.session('relengapi') batch = tables.Batch( uploaded=time.now(), author=body.author, message=body.message) s3 = current_app.aws.connect_to('s3', region) for filename, info in body.files.iteritems(): log = logger.bind(tooltool_sha512=info.digest, tooltool_operation='upload', tooltool_batch_id=batch.id) if info.algorithm != 'sha512': raise BadRequest("'sha512' is the only allowed digest algorithm") if not is_valid_sha512(info.digest): raise BadRequest("Invalid sha512 digest") digest = info.digest file = tables.File.query.filter(tables.File.sha512 == digest).first() if file and file.visibility != info.visibility: raise BadRequest("Cannot change a file's visibility level") if file and file.instances != []: if file.size != info.size: raise BadRequest("Size mismatch for {}".format(filename)) else: if not file: file = tables.File( sha512=digest, visibility=info.visibility, size=info.size) session.add(file) log.info("generating signed S3 PUT URL to {} for {}; expiring in {}s".format( info.digest[:10], current_user, UPLOAD_EXPIRES_IN)) info.put_url = s3.generate_url( method='PUT', expires_in=UPLOAD_EXPIRES_IN, bucket=bucket, key=util.keyname(info.digest), headers={'Content-Type': 'application/octet-stream'}) # The PendingUpload row needs to reflect the updated expiration # time, even if there's an existing pending upload that expires # earlier. The `merge` method does a SELECT and then either UPDATEs # or INSERTs the row. However, merge needs the file_id, rather than # just a reference to the file object; and for that, we need to flush # the inserted file. session.flush() pu = tables.PendingUpload( file_id=file.id, region=region, expires=time.now() + datetime.timedelta(seconds=UPLOAD_EXPIRES_IN)) session.merge(pu) session.add(tables.BatchFile(filename=filename, file=file, batch=batch)) session.add(batch) session.commit() body.id = batch.id return body
import boto import mock import moto from flask import current_app from nose.tools import eq_ from relengapi.blueprints.tooltool import grooming from relengapi.blueprints.tooltool import tables from relengapi.blueprints.tooltool import util from relengapi.lib import time from relengapi.lib.testing.context import TestContext DATA = os.urandom(10240) DATA_DIGEST = hashlib.sha512(DATA).hexdigest() DATA_KEY = util.keyname(DATA_DIGEST) NOW = 1425592922 cfg = { 'AWS': { 'access_key_id': 'aa', 'secret_access_key': 'ss', }, 'TOOLTOOL_REGIONS': { 'us-east-1': 'tt-use1', 'us-west-2': 'tt-usw2', } } test_context = TestContext(config=cfg, databases=['relengapi'])
def test_keyname(): eq_(util.keyname(ONE_DIGEST), 'sha512/' + ONE_DIGEST)
def upload_batch(region=None, body=None): """Create a new upload batch. The response object will contain a ``put_url`` for each file which needs to be uploaded -- which may not be all! The caller is then responsible for uploading to those URLs. The resulting signed URLs are valid for one hour, so uploads should begin within that timeframe. Consider using Amazon's MD5-verification capabilities to ensure that the uploaded files are transferred correctly, although the tooltool server will verify the integrity anyway. The upload must have the header ``Content-Type: application/octet-stream```. The query argument ``region=us-west-1`` indicates a preference for URLs in that region, although if the region is not available then URLs in other regions may be returned. The returned URLs are only valid for 60 seconds, so all upload requests must begin within that timeframe. Clients should therefore perform all uploads in parallel, rather than sequentially. This limitation is in place to prevent malicious modification of files after they have been verified.""" region, bucket = get_region_and_bucket(region) if not body.message: raise BadRequest("message must be non-empty") if not body.files: raise BadRequest("a batch must include at least one file") if body.author: raise BadRequest("Author must not be specified for upload") try: body.author = current_user.authenticated_email except AttributeError: raise BadRequest("Could not determine authenticated username") # verify permissions based on visibilities visibilities = set(f.visibility for f in body.files.itervalues()) for v in visibilities: prm = p.get('tooltool.upload.{}'.format(v)) if not prm or not prm.can(): raise Forbidden("no permission to upload {} files".format(v)) session = g.db.session('relengapi') batch = tables.Batch(uploaded=time.now(), author=body.author, message=body.message) s3 = current_app.aws.connect_to('s3', region) for filename, info in body.files.iteritems(): log = logger.bind(tooltool_sha512=info.digest, tooltool_operation='upload', tooltool_batch_id=batch.id, mozdef=True) if info.algorithm != 'sha512': raise BadRequest("'sha512' is the only allowed digest algorithm") if not is_valid_sha512(info.digest): raise BadRequest("Invalid sha512 digest") digest = info.digest file = tables.File.query.filter(tables.File.sha512 == digest).first() if file and file.visibility != info.visibility: raise BadRequest("Cannot change a file's visibility level") if file and file.instances != []: if file.size != info.size: raise BadRequest("Size mismatch for {}".format(filename)) else: if not file: file = tables.File(sha512=digest, visibility=info.visibility, size=info.size) session.add(file) log.info( "generating signed S3 PUT URL to {} for {}; expiring in {}s". format(info.digest[:10], current_user, UPLOAD_EXPIRES_IN)) info.put_url = s3.generate_url( method='PUT', expires_in=UPLOAD_EXPIRES_IN, bucket=bucket, key=util.keyname(info.digest), headers={'Content-Type': 'application/octet-stream'}) # The PendingUpload row needs to reflect the updated expiration # time, even if there's an existing pending upload that expires # earlier. The `merge` method does a SELECT and then either UPDATEs # or INSERTs the row. However, merge needs the file_id, rather than # just a reference to the file object; and for that, we need to flush # the inserted file. session.flush() pu = tables.PendingUpload( file_id=file.id, region=region, expires=time.now() + datetime.timedelta(seconds=UPLOAD_EXPIRES_IN)) session.merge(pu) session.add(tables.BatchFile(filename=filename, file=file, batch=batch)) session.add(batch) session.commit() body.id = batch.id return body
import os from contextlib import contextmanager from datetime import datetime from datetime import timedelta from flask import current_app from nose.tools import eq_ from relengapi.blueprints.tooltool import grooming from relengapi.blueprints.tooltool import tables from relengapi.blueprints.tooltool import util from relengapi.lib import time from relengapi.lib.testing.context import TestContext DATA = os.urandom(10240) DATA_DIGEST = hashlib.sha512(DATA).hexdigest() DATA_KEY = util.keyname(DATA_DIGEST) NOW = 1425592922 cfg = { 'AWS': { 'access_key_id': 'aa', 'secret_access_key': 'ss', }, 'TOOLTOOL_REGIONS': { 'us-east-1': 'tt-use1', 'us-west-2': 'tt-usw2', } } test_context = TestContext(config=cfg, databases=['relengapi'])