def load(context, url, callback): enable_http_loader = context.config.get('AWS_ENABLE_HTTP_LOADER', default=False) if enable_http_loader and url.startswith('http'): return http_loader.load_sync(context, url, callback, normalize_url_func=_normalize_url) url = urllib2.unquote(url) bucket = context.config.get('S3_LOADER_BUCKET', default=None) if not bucket: bucket, url = _get_bucket(url) if _validate_bucket(context, bucket): bucket_loader = Bucket( connection=thumbor_aws.connection.get_connection(context), name=bucket ) file_key = None try: file_key = bucket_loader.get_key(url) except Exception, e: logger.warn("ERROR retrieving image from S3 {0}: {1}".format(url, str(e))) if file_key: callback(file_key.read()) return
def test_basic_anon(self): auth_con = S3Connection() # create a new, empty bucket bucket_name = 'test-%d' % int(time.time()) auth_bucket = auth_con.create_bucket(bucket_name) # try read the bucket anonymously anon_con = S3Connection(anon=True) anon_bucket = Bucket(anon_con, bucket_name) try: next(iter(anon_bucket.list())) self.fail("anon bucket list should fail") except S3ResponseError: pass # give bucket anon user access and anon read again auth_bucket.set_acl('public-read') time.sleep(10) # Was 5 secondes, turns out not enough try: next(iter(anon_bucket.list())) self.fail("not expecting contents") except S3ResponseError as e: self.fail("We should have public-read access, but received " "an error: %s" % e) except StopIteration: pass # cleanup auth_con.delete_bucket(auth_bucket)
def rmup(args): parser = option_parser("rmup URL [UPLOAD]") parser.add_option("-a", "--all", dest="all", action="store_true", default=False, help="Cancel all uploads for the specified bucket") options, args = parser.parse_args(args) if options.all: if len(args) < 1: parser.error("Specify bucket URL") else: if len(args) != 2: parser.error("Specify bucket URL and UPLOAD") upload = args[1] uri = parse_uri(args[0]) if uri.bucket is None: raise Exception("URL must contain a bucket: %s" % args[0]) if uri.key is not None: raise Exception("URL cannot contain a key: %s" % args[0]) config = get_config(options) conn = get_connection(config, uri) # There is no easy way to do this with boto b = Bucket(connection=conn, name=uri.bucket) for up in b.list_multipart_uploads(): if options.all or up.id == upload: info("Removing upload %s" % up.id) up.cancel_upload()
def test_basic_anon(self): auth_con = S3Connection() # create a new, empty bucket bucket_name = 'test-%d' % int(time.time()) auth_bucket = auth_con.create_bucket(bucket_name) # try read the bucket anonymously anon_con = S3Connection(anon=True) anon_bucket = Bucket(anon_con, bucket_name) try: iter(anon_bucket.list()).next() self.fail("anon bucket list should fail") except S3ResponseError: pass # give bucket anon user access and anon read again auth_bucket.set_acl('public-read') try: iter(anon_bucket.list()).next() self.fail("not expecting contents") except S3ResponseError: self.fail("we should have public-read access.") except StopIteration: pass # cleanup auth_con.delete_bucket(auth_bucket)
def load(context, url, callback): enable_http_loader = context.config.get('AWS_ENABLE_HTTP_LOADER', default=False) if enable_http_loader and url.startswith('http'): return http_loader.load_sync(context, url, callback, normalize_url_func=_normalize_url) url = urllib2.unquote(url) bucket = context.config.get('S3_LOADER_BUCKET', default=None) if not bucket: bucket, url = _get_bucket(url, root_path=context.config.S3_LOADER_ROOT_PATH) if _validate_bucket(context, bucket): bucket_loader = Bucket(connection=get_connection(context), name=bucket) file_key = None try: file_key = bucket_loader.get_key(url) except Exception, e: logger.warn("ERROR retrieving image from S3 {0}: {1}".format( url, str(e))) if file_key: callback(file_key.read()) return
def load(context, url, callback): enable_http_loader = context.config.get('AWS_ENABLE_HTTP_LOADER', default=False) if enable_http_loader and 'http' in url: return http_loader.load(context, url, callback) url = urllib2.unquote(url) if context.config.S3_LOADER_BUCKET: bucket = context.config.S3_LOADER_BUCKET else: bucket, url = _get_bucket(url) if not _validate_bucket(context, bucket): return callback(None) bucket_loader = Bucket( connection=thumbor_aws.connection.get_connection(context), name=bucket ) file_key = bucket_loader.get_key(url) if not file_key: return callback(None) return callback(file_key.read())
def setUp(self): trytond.tests.test_tryton.install_module('nereid_s3') self.static_file = POOL.get('nereid.static.file') self.static_folder = POOL.get('nereid.static.folder') # Mock S3Connection self.s3_api_patcher = patch( 'boto.s3.connection.S3Connection', autospec=True ) PatchedS3 = self.s3_api_patcher.start() # Mock S3Key self.s3_key_patcher = patch( 'boto.s3.key.Key', autospec=True ) PatchedS3Key = self.s3_key_patcher.start() PatchedS3.return_value = connection.S3Connection('ABCD', '123XYZ') PatchedS3.return_value.get_bucket = lambda bucket_name: Bucket( PatchedS3.return_value, 'tryton-test-s3' ) PatchedS3Key.return_value = Key( Bucket(PatchedS3.return_value, 'tryton-test-s3'), 'some key' ) PatchedS3Key.return_value.key = "some key" PatchedS3Key.return_value.get_contents_as_string = lambda *a: 'testfile' PatchedS3Key.return_value.set_contents_from_string = \ lambda value: 'testfile'
def get_file(self): b = Bucket(self.conn, 'Bucket01') key = b.get_key('file10m.dat') filename = str(uuid.uuid4()) key.get_contents_to_filename('%s.tmp' % filename) # remove the file from the local fs os.remove('%s.tmp' % filename)
def getNextResults(pid): try: conn = boto.connect_s3(host='tims4.mobi-cloud.com', port=80, is_secure=False) except Exception as e: conn = boto.connect_s3( aws_access_key_id="00c36f16c2600f70ae60", aws_secret_access_key="XsSbmCIfcYrX5NdCBj7n1QSaU2lhdgDJJBDlT7VE", host='tims4.mobi-cloud.com', port=80, is_secure=False) bucket = Bucket(conn, bucket_name) results = bucket.get_all_keys(max_keys=1, headers=None, prefix="topics/" + str(my_topic) + "/ProducerID=" + str(pid) + "/", marker=idToLastResult[pid]) if len(results) == 1: print(keyToFileName(results[0])) if len(results) > 0: idToLastResult[pid] = keyToFileName(results[0]) results[0].get_contents_to_filename("this.json") return results[0] else: return None
def load(context, url, callback): enable_http_loader = context.config.get('AWS_ENABLE_HTTP_LOADER', default=False) if enable_http_loader and 'http' in url: return http_loader.load(context, url, callback) url = urllib2.unquote(url) if context.config.S3_LOADER_BUCKET: bucket = context.config.S3_LOADER_BUCKET else: bucket, url = _get_bucket(url) if not _validate_bucket(context, bucket): return callback(None) bucket_loader = Bucket( connection=thumbor_aws.connection.get_connection(context), name=bucket) file_key = bucket_loader.get_key(url) if not file_key: return callback(None) return callback(file_key.read())
class Compost(object): def __init__(self, directory, bucket): self.directory = directory self.bucket = Bucket(connection=boto.connect_s3(), name=bucket) def turn(self): """ 'Turn' the compost, i.e. make a backup of all files in the local directory. """ for filename, full_path in self._local_files(): logger.debug('backing up {}'.format(filename)) key = self.bucket.new_key(filename) key.set_contents_from_filename(full_path) def list(self): """Return a list of known backed up files.""" return [k.name for k in self.bucket.get_all_keys()] def read(self, filename): """ Return the contents of named file, or the empty string if the files does not exist. """ return self.bucket.get_key(filename).get_contents_as_string() def _local_files(self): for f in os.listdir(self.directory): yield f, os.path.join(self.directory, f)
def ls(bucket_name, prefix='', pattern=None): connection = boto.connect_s3() bucket = Bucket(connection, bucket_name) keys = [k.key for k in bucket.list(prefix=prefix)] if pattern: regex = re.compile(pattern) keys = [k for k in keys if regex.search(k)] return keys
def test_restore_header_with_ongoing_restore(self): self.set_http_response( status_code=200, header=[('x-amz-restore', 'ongoing-request="true"')]) b = Bucket(self.service_connection, 'mybucket') k = b.get_key('myglacierkey') self.assertTrue(k.ongoing_restore) self.assertIsNone(k.expiry_date)
def test_500_retry(self, sleep_mock): self.set_http_response(status_code=500) b = Bucket(self.service_connection, 'mybucket') k = b.new_key('test_failure') fail_file = StringIO('This will attempt to retry.') with self.assertRaises(BotoServerError): k.send_file(fail_file)
def empty_bucket(bucket_name): """Destructive helper.""" import boto from boto.s3.bucket import Bucket bucket = Bucket(connection=boto.connect_s3(), name=bucket_name) for key in bucket.get_all_keys(): key.delete()
def get_or_create_key(self, blob): bucket_name = self.get_bucket_name(blob.content_object) bucket = Bucket(self.connection, bucket_name) key_name = self.get_key_name(blob) key = bucket.get_key(key_name) if key is None: key = bucket.new_key(key_name) return key
def test_restore_header_with_ongoing_restore(self): self.set_http_response(status_code=200, header=[('x-amz-restore', 'ongoing-request="true"')]) b = Bucket(self.service_connection, 'mybucket') k = b.get_key('myglacierkey') self.assertTrue(k.ongoing_restore) self.assertIsNone(k.expiry_date)
def test_restore_completed(self): self.set_http_response( status_code=200, header=[('x-amz-restore', 'ongoing-request="false", ' 'expiry-date="Fri, 21 Dec 2012 00:00:00 GMT"')]) b = Bucket(self.service_connection, 'mybucket') k = b.get_key('myglacierkey') self.assertFalse(k.ongoing_restore) self.assertEqual(k.expiry_date, 'Fri, 21 Dec 2012 00:00:00 GMT')
def __init__(self): super(S3StaticFileStorage, self).__init__() self._bucket = Bucket(connection=s3_conn, name=self.BUCKET_NAME) self._bucket_public = Bucket(connection=s3_public_conn, name=self.BUCKET_NAME) if s3_conn.lookup(self.BUCKET_NAME) is None: s3_conn.create_bucket(self.BUCKET_NAME, policy='public-read') # Allow CORS access (for web fonts) self._bucket.set_cors(self._get_cors_config())
def __init__(self, jobflow, cluster_id, cluster_name, bucket, prefix, **kwargs): super(S3Mr2LogMonitor, self).__init__(**kwargs) self.jobflow = jobflow self.cluster_id = cluster_id self.cluster_name = cluster_name self.bucket = bucket self.prefix = prefix self.emr_logs = Bucket(boto.connect_s3(), bucket)
def test_500_retry(self): self.set_http_response(status_code=500) b = Bucket(self.service_connection, 'mybucket') k = b.new_key('test_failure') fail_file = StringIO('This will attempt to retry.') try: k.send_file(fail_file) self.fail("This shouldn't ever succeed.") except BotoServerError: pass
def test_should_not_raise_kms_related_integrity_errors(self): self.set_http_response( status_code=200, header=[('x-amz-server-side-encryption-aws-kms-key-id', 'key'), ('etag', 'not equal to key.md5')]) bucket = Bucket(self.service_connection, 'mybucket') key = bucket.new_key('test_kms') file_content = StringIO('Some content to upload.') # Should not raise errors related to integrity checks: key.send_file(file_content)
def main(arg): #Copy Source bucket not exist try: bucket=Bucket() bucket=conn.create_bucket(arg[0]) bucket2=conn.create_bucket(arg[1]) key=bucket.new_key('test.txt') key.set_contents_from_string("Hello World!") bucket2.copy_key('cptest','nosuchbucket','test.txt') except S3ResponseError,e: Expectexception(e,404)
def delete(self, *args, **kwargs): if settings.DEBUG: os.remove(self.image.path) else: conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) b = Bucket(conn, settings.AWS_STORAGE_BUCKET_NAME) k = Key(b) k.key = self.image.url.replace( 'https://my_outfits.s3.amazonaws.com/', '').split('?')[0] b.delete_key(k) super(ImageUpload, self).delete(*args, **kwargs)
def test_parse_tagging_response(self): self.set_http_response(status_code=200) b = Bucket(self.service_connection, 'mybucket') api_response = b.get_tags() # The outer list is a list of tag sets. self.assertEqual(len(api_response), 1) # The inner list is a list of tags. self.assertEqual(len(api_response[0]), 2) self.assertEqual(api_response[0][0].key, 'Project') self.assertEqual(api_response[0][0].value, 'Project One') self.assertEqual(api_response[0][1].key, 'User') self.assertEqual(api_response[0][1].value, 'jsmith')
def remove_from_s3(self): """ Removes file for this model instance from S3 """ conn = boto.connect_s3() # loop over buckets, we have more than one, and remove this playground. if app_config.S3_BUCKETS: for bucket in app_config.S3_BUCKETS: b = Bucket(conn, bucket) k = Key(b) k.key = '%s/playground/%s.html' % (app_config.PROJECT_SLUG, self.slug) b.delete_key(k)
def initialize_jobs(bucket_name): setup_context() jobs_count = 0 conn = S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY) bucket = Bucket(connection=conn, name=bucket_name) for key in bucket.list(): queue_job("tasks.Download", { "bucket_name": bucket_name, "key_name": key.key }, queue=download_queue) jobs_count += 1 return jobs_count
def listS3FilesForId(self, id): from boto.s3.connection import S3Connection from boto.s3.bucket import Bucket logging.info("AWS keys: %s %s: ", self.access_key_id, self.aws_secret_access_key) conn = S3Connection(self.access_key_id, self.aws_secret_access_key) bucketName = "files.littleshoot.org" bucket = Bucket(conn, bucketName) prefix = 'user/'+id+'/files/' response = bucket.list(prefix=prefix, delimiter='/') baseUrl = "http://" + bucketName + "/" files = [] # The results are all Keys. See: # http://boto.s3.amazonaws.com/index.html for key in response: logging.info("Got full key: %s", key) #fullPath = "http://" + bucketName + "/" + key.name rawNameArray = key.name.split('/') index = len(rawNameArray) - 1 title = rawNameArray[index] #digest = hmac.new(self.aws_secret_access_key, key.name, sha).digest() #path = base64.encodestring(digest).strip() ##logging.info("Path is: %s", path) #url = baseUrl + key.name url = baseUrl + key.name + "?torrent" short = link.shorten(url, title, key.size) mimeType, encoding = mimetypes.guess_type(title) if mimeType is None: mimeType = 'application/octet-stream' mediaType = typeTranslator.getType(title); file = { 'title' : title, 'size' : key.size, 'lastModified' : key.last_modified, 'uri' : short, 'mimeType' : mimeType, 'mediaType' : mediaType } files.append(file) data = {'files' : files} json = simplejson.dumps(data) logging.info('JSON output: \n %s', json) return json
def test_parse_lifecycle_response(self): self.set_http_response(status_code=200) bucket = Bucket(self.service_connection, 'mybucket') response = bucket.get_lifecycle_config() self.assertEqual(len(response), 2) rule = response[0] self.assertEqual(rule.id, 'rule-1') self.assertEqual(rule.prefix, 'prefix/foo') self.assertEqual(rule.status, 'Enabled') self.assertEqual(rule.expiration, 365) transition = rule.transition self.assertEqual(transition.days, 30) self.assertEqual(transition.storage_class, 'GLACIER') self.assertEqual(response[1].transition.date, '2012-12-31T00:00:000Z')
def test_400_timeout(self, sleep_mock): weird_timeout_body = "<Error><Code>RequestTimeout</Code></Error>" self.set_http_response(status_code=400, body=weird_timeout_body) b = Bucket(self.service_connection, 'mybucket') k = b.new_key('test_failure') fail_file = StringIO('This will pretend to be chunk-able.') k.should_retry = counter(k.should_retry) self.assertEqual(k.should_retry.count, 0) with self.assertRaises(BotoServerError): k.send_file(fail_file) self.assertTrue(k.should_retry.count, 1)
def test_504_gateway_timeout(self, sleep_mock): weird_timeout_body = "<Error><Code>GatewayTimeout</Code></Error>" self.set_http_response(status_code=504, body=weird_timeout_body) b = Bucket(self.service_connection, 'mybucket') k = b.new_key('test_failure') fail_file = StringIO('This will pretend to be chunk-able.') k.should_retry = counter(k.should_retry) self.assertEqual(k.should_retry.count, 0) with self.assertRaises(BotoServerError): k.send_file(fail_file) self.assertTrue(k.should_retry.count, 1)
def upload(user, passwd, bucket, metadata, key, fd): conn = S3Connection(user, passwd, host=settings.S3_HOST, is_secure=False) bucket = '{}-{}'.format(user.lower(), bucket.strip('-')) try: bucket = conn.create_bucket(bucket, headers=metadata) except S3CreateError as e: if e.status == 409: bucket = Bucket(conn, bucket) else: raise key = bucket.new_key(key) key.set_contents_from_file(fd) return key.generate_url(0).split('?')[0]
def test_acl_permission_choices_for_create_bucket(self): bucket = Bucket() form = self.form_class(self.request, bucket_object=bucket) permission_choices = dict(form.get_permission_choices()) self.assertEqual(permission_choices.get('FULL_CONTROL'), 'Full Control') self.assertEqual(permission_choices.get('READ'), 'View/Download objects') self.assertEqual(permission_choices.get('WRITE'), 'Create/delete objects')
def main(): authenticate() connection = S3Connection(AWS_ACCESS_ID, os.environ['aws-secret-key']) bucket = Bucket(connection, S3_BUCKET) publish(bucket)
def generate(): api_key = request.form['api_key'] poll = request.form["poll"] if poll == "false": dimensions = (int(request.form['width']), int(request.form['height'])) generate_grid.delay(api_key, dimensions) return jsonify({'status': 'generating'}) else: image_path = "images/{0}.png".format(hashlib.md5(api_key).hexdigest()) conn = S3Connection(settings.S3_ACCESS_KEY, settings.S3_SECRET_KEY) bucket = Bucket(conn, settings.S3_BUCKET) if bucket.get_key(image_path): return jsonify({'status': 'ok', 'path': image_path}) else: return jsonify({'status': 'generating'})
def make_s3_store(bucket_name, aws_access_key_id=None, aws_secret_access_key=None, path='osm', reduced_redundancy=False, date_prefix=''): conn = connect_s3(aws_access_key_id, aws_secret_access_key) bucket = Bucket(conn, bucket_name) s3_store = S3(bucket, date_prefix, path, reduced_redundancy) return s3_store
def execute(self, context, obj): connection = S3Connection() bucket = Bucket(connection=connection, name=context['bucket']) key1 = Key(bucket=bucket, name=context['name']) key2 = Key(bucket=bucket, name=context['name'] + '.encrypted') key2.set_contents_from_string(key1.get_contents_as_string()) return 'done'
class S3Archiver(): def __init__(self, bucket, path='history', hash_depth=3): self.s3 = boto.connect_s3() self.inviso_store = Bucket(self.s3, bucket) self.history_path = path self.hash_depth = hash_depth def archive(self, event): job_id = event['job.id'] dest_name = job_id prefix = dest_name[-self.hash_depth:] + '/' for uri, suffix in [(event['history.uri'], '.history.gz'), (event['config.uri'], '.conf.gz')]: content = self.fetch_content(uri) gz_out = StringIO.StringIO() with gzip.GzipFile(fileobj=gz_out, mode="w") as gz: gz.write(content) new_key = self.inviso_store.new_key(self.history_path + prefix + dest_name + suffix) new_key.set_contents_from_string(gz_out.getvalue()) log.debug("Transfer complete: " + dest_name)
def pull_from_hyperstore(key_name): conn = boto.connect_s3(host='tims4.mobi-cloud.com', port=80, is_secure=False) bucket = Bucket(conn, bucket_name) gkey = Key(bucket=bucket, name=key_name) gkey.get_contents_to_filename("this.json")
def main(): opt_parser = OptionParser(usage=USAGE) opt_parser.add_option('-v', '--verbose', action='store_true', default=False) opt_parser.add_option('-a', '--aws-creds', default=None) opts, args = opt_parser.parse_args() if not args: raise Exception(USAGE) if opts.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.WARNING) logger.addHandler(logging.StreamHandler(sys.stderr)) if opts.aws_creds: aws_access_key, aws_secret_key = get_aws_creds_file(opts.aws_creds) else: aws_access_key, aws_secret_key = get_aws_creds_env() s3_bucket_name = args[0] base_paths = args[1:] s3_cxn = S3Connection(aws_access_key, aws_secret_key) s3_bucket = Bucket(s3_cxn, s3_bucket_name) for base_path in args: logger.debug('getting rotated ossec logs in %s', base_path) for log in get_logs(base_path): if not log.is_archived(s3_bucket): log.archive(s3_bucket) elif log.expired: log.remove()
def process_all(action, s3_key, s3_secret, bucket_name, prefix, local_folder, queue, thread_count, max_retry, zone): """ Orchestrates the en-queuing and consuming threads in conducting: 1. Local folder structure construction 2. S3 key en-queuing 3. S3 key uploading/downloading if file updated :param action: download or upload :param s3_key: Your S3 API Key :param s3_secret: Your S3 API Secret :param bucket_name: Your S3 bucket name :param prefix: The path to the S3 folder to be downloaded. Example: bucket_root/folder_1 :param local_folder: The local folder you wish to upload/download the files from/to :param queue: A ProcessKeyQueue instance to enqueue all the keys in :param thread_count: The number of threads that you wish s3concurrent to use :param max_retry: The max times for s3concurrent to retry uploading/downloading a key :return: True is all processed, false if interrupted in any way """ # conn = S3Connection(s3_key, s3_secret) S3Connection.DefaultHost = 's3' + zone + '.amazonaws.com' conn = boto.s3.connect_to_region( zone, aws_access_key_id=s3_key, aws_secret_access_key=s3_secret, is_secure=True, calling_format=boto.s3.connection.OrdinaryCallingFormat(), ) bucket = Bucket(connection=conn, name=bucket_name) if action == 'download': target_function = enqueue_s3_keys_for_download else: target_function = enqueue_s3_keys_for_upload enqueue_thread = threading.Thread(target=target_function, args=(bucket, prefix, local_folder, queue)) enqueue_thread.daemon = True enqueue_thread.start() queue.queuing_started() consume_thread = threading.Thread(target=consume_queue, args=(queue, action, thread_count, max_retry)) consume_thread.daemon = True consume_thread.start() while not queue.all_processed: # report progress every 10 secs logger.info('{0} keys enqueued, and {1} keys {2}ed'.format( queue.enqueued_counter, queue.de_queue_counter, action)) time.sleep(10) logger.info('{0} keys enqueued, and {1} keys {2}ed'.format( queue.enqueued_counter, queue.de_queue_counter, action))
def load(context, url, callback): if context.config.S3_LOADER_BUCKET: bucket = context.config.S3_LOADER_BUCKET else: bucket, url = _get_bucket(url) if not _validate_bucket(context, bucket): return callback(None) conn = _establish_connection(context.config) bucket_loader = Bucket(connection=conn, name=bucket) file_key = bucket_loader.get_key(url) if not file_key: return callback(None) return callback(file_key.read())
def __init__(self, bucketName, awsId, awsSecret): """Creates a new instance of S3Bucket.""" self._bucketName = bucketName self._awsId = awsId self._awsSecret = awsSecret self._conn = S3Connection(self._awsId, self._awsSecret) self._bucket = Bucket(self._conn, bucketName)
def s3_bucket(self): conn = connect_to_region( self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, is_secure=True, ) return Bucket(connection=conn, name=self.bucket_name)
def execute(self, context, obj): connection = S3Connection() bucket = Bucket(connection=connection, name=context['bucket']) key = Key(bucket=bucket, name=context['name']) if key.exists(): return 'done' else: return 'missing'
class S3Mr2LogMonitor(ElasticSearchMonitor): def __init__(self, jobflow, cluster_id, cluster_name, bucket, prefix, **kwargs): super(S3Mr2LogMonitor, self).__init__(**kwargs) self.jobflow = jobflow self.cluster_id = cluster_id self.cluster_name = cluster_name self.bucket = bucket self.prefix = prefix self.emr_logs = Bucket(boto.connect_s3(), bucket) def run(self): listing = self.emr_logs.list(prefix=self.prefix, delimiter="/") events = [] for f in listing: path = f.name if not path.endswith('.jhist'): continue ts = arrow.get(f.last_modified) if(ts <= self.checktime): log.debug('Skipping old file: ' + f.name) continue job_id = job_pattern.match(path.split('/')[-1]).group(0) if job_id in self.jobs and self.jobs[job_id] >= ts.timestamp*1000: log.debug('Skipping processed file: ' + f.name) continue config_path = path[:path.rfind('/')]+'/'+job_id+'_conf.xml' event = { 'inviso.type': 'mr2', 'job.id': job_id, 'application.id': job_id.replace('job_', 'application_'), 'job.type': 'mr2', 'file.type': ['history', 'config'], 'jobflow' : self.jobflow, 'cluster.id': self.cluster_id, 'cluster': self.cluster_name, 'history.uri': 's3://%s/%s' % (self.bucket,path), 'config.uri':'s3://%s/%s' % (self.bucket,config_path), 'bucket': self.bucket, 'timestamp': str(ts), 'epoch': ts.timestamp * 1000, 'mapreduce.version': 'mr2' } log.info('Publishing event: (%s) %s ' % (event['cluster'], event['job.id'])) events.append(event) for chunk in [events[i:i + self.chunk_size] for i in xrange(0, len(events), self.chunk_size)]: self.publisher.publish(chunk)
def test_400_timeout(self): weird_timeout_body = "<Error><Code>RequestTimeout</Code></Error>" self.set_http_response(status_code=400, body=weird_timeout_body) b = Bucket(self.service_connection, 'mybucket') k = b.new_key('test_failure') fail_file = StringIO('This will pretend to be chunk-able.') # Decorate. k.should_retry = counter(k.should_retry) self.assertEqual(k.should_retry.count, 0) try: k.send_file(fail_file) self.fail("This shouldn't ever succeed.") except BotoServerError: pass self.assertTrue(k.should_retry.count, 1)
class StorageS3(): def __init__(self, *args, **kwargs): self.s3_key = kwargs['key'] self.s3_secret = kwargs['secret'] self.s3_bucket_name = kwargs['bucket'] self.make_connection() def make_connection(self): conn = S3Connection(self.s3_key, self.s3_secret) self.bucket = Bucket(conn, self.s3_bucket_name) def upload_file(self, local_path, remote_path): key = Key(self.bucket, remote_path) key.set_contents_from_file(file(str(local_path))) key.set_acl('public-read') def delete_file(self, remote_path): self.bucket.delete_key(remote_path)
def __init__(self,queue): Thread.__init__(self) self.conn= S3Connection(settings.ACCESS_KEY,settings.SECRET_KEY) self.thumb_bucket= Bucket(self.conn,settings.THUMB_BUCKET) self.store_bucket= Bucket(self.conn,settings.STORE_BUCKET) self.buckets= {settings.THUMB_BUCKET: self.thumb_bucket, settings.STORE_BUCKET: self.store_bucket} self.queue= queue self.daemon= True self.start()
def send_to_aws(self): """Everything should be packaged now. Let's send it to S3""" # Establish connection to AWS and upload tarball conn = S3Connection(self.config.get('general', 'aws_key'), self.config.get('general', 'aws_secret_key')) bucket = Bucket(conn, self.config.get('general', 'bucket_name')) tarball_key = Key(bucket) tarball_key.key = self.config.get('general', 'tarball_name') tarball_path = self.config.get('general', 'dump_path') + self.config.get('general', 'tarball_name') tarball_key.set_contents_from_filename(tarball_path) # uncomment the following to make the bucket publicly downloadable bucket.set_acl('public-read', tarball_key.key) # Upload the stats file stats_key = Key(bucket) stats_key.key = 'stats.json' stats_key.set_contents_from_filename(self.config.get('general', 'dump_path') + 'stats.json') # uncomment the following to make the bucket publicly downloadable bucket.set_acl('public-read', stats_key.key)