def push_code(): """ Bundles worker code, library & configuration in to a zipped files and store it on S3. Finally updates """ from boto.s3.connection import S3Connection from boto.s3 import key try: local("rm -r code") except: pass local("mkdir code") local("cp config.py code/config.py") local("cp filequeue.py code/filequeue.py") local("cp -r libs code/libs") local("cp worker.py code/worker.py") local("tar -zcvf code.tar.gz code") S3 = S3Connection() code_bucket = S3.create_bucket(CODE_BUCKET) code = key.Key(code_bucket) code.key = CODE_KEY code.set_contents_from_filename("code.tar.gz") local("rm code.tar.gz") local("rm -r code") logging.info("code pushed to bucket " + CODE_BUCKET + " key " + CODE_KEY)
def write_file(self, rendered_doc, policy='public-read'): path = rendered_doc.path content = rendered_doc.read() path = path.lstrip('/') path = path if path != '' else self.config.index_document if isinstance(content, unicode): content = content.encode('utf-8') bucket_key = key.Key(self.bucket) bucket_key.key = path fp = cStringIO.StringIO() fp.write(content) ext = os.path.splitext(path)[-1] or '.html' mimetype = mimetypes.guess_type(path)[0] headers = {} headers['Content-Type'] = mimetype if mimetype else 'text/html' if self.config.headers and not path.startswith('.grow'): for header in self.config.headers: if (ext not in header.extensions and '*' not in header.extensions): continue for field in header.fields: headers[field.name] = field.value else: headers['Cache-Control'] = 'no-cache' fp.seek(0) bucket_key.set_contents_from_file(fp, headers=headers, replace=True, policy=policy) fp.close()
def write_file_to_bucket(self, local_file): path = self.build_bucket_path(local_file) print "Writing file", path k = s3key.Key(self.bucket) k.key = path k.set_contents_from_filename(str(local_file)) k.set_acl('public-read')
def _upload_photo(image, image_type, id=None, file_path=None): conn = connection.S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) s3bucket = conn.create_bucket(settings.AWS_PHOTO_UPLOAD_BUCKET) filename = str(uuid.uuid4()) tmp = tempfile.NamedTemporaryFile('w+b', -1, '.jpg') image = image.convert("RGB") image.save(tmp.name) key = s3key.Key(s3bucket) if file_path: key.key = '%s.jpg' % file_path elif id: key.key = '%s/%s_%s.jpg' % (image_type, filename, id) else: key.key = '%s/%s.jpg' % (image_type, filename) try: key.set_contents_from_filename(tmp.name, None, True, None, 10, 'public-read', None) except: raise key.close() tmp.close() return "http://%s.s3.amazonaws.com/%s" % (settings.AWS_PHOTO_UPLOAD_BUCKET, key.key)
def test_no_retry_on_keyboadinterrupt(b, collect): """Ensure that KeyboardInterrupts are forwarded.""" key_name = 'test-key-name' k = key.Key(bucket=b, name=key_name) # If vanilla KeyboardInterrupt is used, then sending SIGINT to the # test can cause it to pass improperly, so use a subtype instead. class MarkedKeyboardInterrupt(KeyboardInterrupt): pass collect.inject(MarkedKeyboardInterrupt('SIGINT, probably')) d = s3_deleter.Deleter() with pytest.raises(MarkedKeyboardInterrupt): d.delete(k) # Exactly when coroutines are scheduled is non-deterministic, # so spin while yielding to provoke the # MarkedKeyboardInterrupt being processed within the # pytest.raises context manager. while True: gevent.sleep(0.1) # Only one key should have been aborted, since the purpose is to # *not* retry when processing KeyboardInterrupt. assert collect.aborted_keys == [key_name] # Turn off fault injection and flush/synchronize with close(). collect.inject(None) d.close() # Since there is no retrying, no keys should be deleted. assert not collect.deleted_keys
def read_file(self, path): file_key = key.Key(self.bucket) file_key.key = path try: return file_key.get_contents_as_string() except boto.exception.S3ResponseError, e: if e.status != 404: raise raise IOError('File not found: {}'.format(path))
def make_key(*args, **kwargs): from datetime import datetime from datetime import timedelta k = key.Key(*args, **kwargs) last_modified = (datetime.now() - timedelta(days=60)).strftime(ISO8601) k.last_modified = last_modified return k
def test_processes_one_deletion(b, collect): # Mock up a key and bucket key_name = 'test-key-name' k = key.Key(bucket=b, name=key_name) d = s3_deleter.Deleter() d.delete(k) d.close() assert collect.deleted_keys == [key_name]
def upload_path(prefix, path): name = os.path.basename(path) c = boto.connect_s3() b = c.get_bucket('files.projecthawkthorne.com') k = key.Key(b) k.key = os.path.join(prefix, name) k.set_contents_from_filename(path) k.set_acl('public-read')
def upload_path(b, prefix, path): if 'TRAVIS' not in os.environ: logging.info('[DRYRUN] uploading {} to {}'.format(path, prefix)) return name = os.path.basename(path) k = key.Key(b) k.key = os.path.join(prefix, name) logging.info('Uploading {} to {}'.format(path, prefix)) k.set_contents_from_filename(path) k.set_acl('public-read')
def sign_s3_get(key): rval = cache.get('signed_url:%s:%s' % (settings.S3_BUCKET, key)) if rval is None: life_time = 3600 s3_conn = connection.S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bkey = s3_key.Key(s3_conn.get_bucket(settings.S3_BUCKET)) bkey.key = key rval = bkey.generate_url(life_time, query_auth=True, force_http=True) cache.set('signed_url:%s:%s' % (settings.S3_BUCKET, key), rval, life_time - 60) return rval
def get_modeldata(s3_data): try: s3_conn = connection.S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) key = s3_key.Key(s3_conn.get_bucket(settings.S3_BUCKET)) key.key = s3_data data = key.get_contents_as_string() except (S3ResponseError, S3DataError): raise FileBackendError() if s3_data.endswith('.gz'): data = gzip_to_str(data) return json.loads(data)
def get_file_binary(self, name): ''' Getter for the binary_file field. This fetches the file from the Amazon s3 :param name: Field name :return: File buffer ''' if self.type == "s3": bucket = self.folder.get_bucket() s3key = key.Key(bucket) s3key.key = self.s3_key return buffer(s3key.get_contents_as_string()) return super(NereidStaticFile, self).get_file_binary(name)
def _set_file_binary(self, value): """ Stores the file to amazon s3 :param static_file: Browse record of the static file :param value: The value to set """ if not value: return if self.type == "s3": bucket = self.folder.get_bucket() s3key = key.Key(bucket) s3key.key = self.s3_key return s3key.set_contents_from_string(value) return super(NereidStaticFile, self)._set_file_binary(value)
def read(self): """ Get an S3 connection and attempt to read the file. If the file doesn't exist, return an iterable empty string and let core gettext blow up as it would on a regular empty file. :return: file contents or empty string if not found. """ conn = self._get_conn() bucket = conn.get_bucket(self._bucket_name) key = s3_key.Key(bucket=bucket, name=self._filename) if key.exists(): return key.get_contents_as_string() return ""
def write_file(self, path, content, policy='public-read'): path = path.lstrip('/') path = path if path != '' else self.config.index_document if isinstance(content, unicode): content = content.encode('utf-8') bucket_key = key.Key(self.bucket) bucket_key.key = path fp = cStringIO.StringIO() fp.write(content) mimetype = mimetypes.guess_type(path)[0] # TODO: Allow configurable headers. headers = { 'Cache-Control': 'no-cache', 'Content-Type': mimetype if mimetype else 'text/html', } fp.seek(0) bucket_key.set_contents_from_file(fp, headers=headers, replace=True, policy=policy) fp.close()
def test_processes_many_deletions(b, collect): # Generate a target list of keys in a stable order target = sorted(['test-key-' + str(x) for x in range(20001)]) # Construct boto S3 Keys from the generated names and delete them # all. keys = [key.Key(bucket=b, name=key_name) for key_name in target] d = s3_deleter.Deleter() for k in keys: d.delete(k) d.close() # Sort the deleted key names to obtain another stable order and # then ensure that everything was passed for deletion # successfully. assert sorted(collect.deleted_keys) == target
def get_s3_key(self, filename, file_type): """ Determines the S3 key to store the file under Args: filename (str): filename of the encrypted file file_type (str): type of the encrypted file Returns: boto.s3.key.Key: the key to store the file in """ basename = os.path.basename(filename) return key.Key( self.get_bucket(), 'exam_audits/{file_type}/{filename}'.format( filename=basename, file_type=file_type, ) )
def write_file(self, path, content, policy='public-read'): path = path.lstrip('/') if isinstance(content, unicode): content = content.encode('utf-8') bucket_key = key.Key(self.bucket) bucket_key.key = path fp = cStringIO.StringIO() fp.write(content) # TODO(jeremydw): Better headers. mimetype = mimetypes.guess_type(path)[0] headers = {'Cache-Control': 'no-cache'} if mimetype: headers['Content-Type'] = mimetype fp.seek(0) bucket_key.set_contents_from_file(fp, headers=headers, replace=True, policy=policy) fp.close()
def ls_bucket(bucket_name=OUTPUT_S3_BUCKET): """ Selects one key from the bucket store locally and runs less command """ from boto.s3.connection import S3Connection from boto.s3 import key logging.getLogger('boto').setLevel(logging.CRITICAL) import random S3 = S3Connection() bucket = S3.get_bucket(bucket_name) keys = [example_key for example_key in bucket.list()] if keys: example = key.Key(bucket) example.key = random.sample(keys, 1)[0] example.get_contents_to_filename("temp.json") with open("output_keys.json", 'w') as fh: fh.write(json.dumps(keys)) print "Number of keys in the output bucket ", len(keys) print "a randomly selected key is written to temp.json" print "list of keys are stored in output_keys.json"
def test_retry_on_normal_error(b, collect): """Ensure retries are processed for most errors.""" key_name = 'test-key-name' k = key.Key(bucket=b, name=key_name) collect.inject(Exception('Normal error')) d = s3_deleter.Deleter() d.delete(k) # Since delete_keys will fail over and over again, aborted_keys # should grow quickly. while len(collect.aborted_keys) < 2: gevent.sleep(0.1) # Since delete_keys has been failing repeatedly, no keys should be # successfully deleted. assert not collect.deleted_keys # Turn off fault injection and flush/synchronize with close(). collect.inject(None) d.close() # The one enqueued job should have been processed.n assert collect.deleted_keys == [key_name]
def walk(bucket, dir, files): for file in files: full_path = os.path.join(resource_path, dir, file) if not os.path.isfile(full_path) or full_path.endswith('~'): continue key_name = full_path[len(resource_path):] for key in bucket.list(prefix=key_name.lstrip('/')): key.delete() resource_id = key_name.replace('/', '') resource = model.Resource.get(resource_id) if not resource: continue last_part = resource.url.split('/')[-1] file_name = munge.munge_filename(last_part) key_name = key_name + '/' + file_name key = s3key.Key(bucket) key.key = key_name key.set_contents_from_filename(full_path) print 'Archived %s' % key_name os.remove(full_path)
def handle(self, *args, **options): send_mail(u'[Salesforce Mirror System] - Starting {now}'.format( now=datetime.datetime.now().strftime(u'%Y-%m-%d %H:%M:%s')), u'', u'*****@*****.**', [u'*****@*****.**'], fail_silently=True) spto = SalesforceProductTier() spro = spto.provider spro.update_recent(*args) spro_log = u"\n".join(spro.get_log()) spro.reset_log() if settings.DEBUG: self.stdout.write(spro_log) spo = spto.product spo.update_recent(*args) spo_log = u"\n".join(spo.get_log()) spo.reset_log() if settings.DEBUG: self.stdout.write(spo_log) spto.update_recent(*args) spto_log = u"\n".join(spto.get_log()) spto.reset_log() if settings.DEBUG: self.stdout.write(spto_log) spoto = SalesforceOldProductTier() spoto.update_recent(*args) spoto_log = u"\n".join(spoto.get_log()) spoto.reset_log() if settings.DEBUG: self.stdout.write(spoto_log) from boto.s3 import bucket, connection, key connection_object = connection.S3Connection( aws_access_key_id=settings.AWS_CREDENTIALS[u's3'][u'access_id'], aws_secret_access_key=settings.AWS_CREDENTIALS[u's3'] [u'secret_key']) bucket_object = bucket.Bucket(connection=connection_object, name=u'salesforce-mirror-system') now = datetime.datetime.now().strftime(u'%Y-%m-%d %H:%M:%s') filename = u'log_{date}.log.bz2'.format(date=now) key_object = key.Key(bucket=bucket_object, name=filename) data = u'\n\n'.join([spto_log, spo_log, spro_log, spoto_log]).encode(u'utf8') output = bz2.BZ2File(u'/tmp/{filename}'.format(filename=filename), u'wb') try: output.write(data) finally: output.close() key_object.set_contents_from_filename( u'/tmp/{filename}'.format(filename=filename), reduced_redundancy=True) os.remove(u'/tmp/{filename}'.format(filename=filename)) key_object.set_acl(u'public-read') url = key_object.generate_url(expires_in=0, query_auth=False) send_mail(subject=u'[Salesforce Mirror System] - Ending {now}'.format( now=datetime.datetime.now().strftime(u'%Y-%m-%d %H:%M:%s')), message=u'Log stored at: {url}'.format(url=url), from_email=u'*****@*****.**', recipient_list=[u'*****@*****.**'])
import os import argparse import boto from boto.s3 import key parser = argparse.ArgumentParser(description="Upload files to Github") parser.add_argument("path", help="File to upload") args = parser.parse_args() name = os.path.basename(args.path) c = boto.connect_s3() b = c.get_bucket('hawkthorne.journey.builds') k = key.Key(b) k.key = name k.set_contents_from_filename(args.path) k.set_acl('public-read')
def get(self, name): k = key.Key(bucket=self.__bucket(), name=name) return k.get_contents_as_string()
def put(self, name, data): k = key.Key(bucket=self.__bucket(), name=name) k.set_contents_from_string( data, headers={'Content-Type': 'application/octet-stream'})
calling_format=connection.OrdinaryCallingFormat(), ) bname = args.bucket all_buckets = conn.get_all_buckets() bucket = [b for b in all_buckets if b.name == bname] if bucket: bucket = bucket[0] num_objs = len(list(bucket.list())) print('Bucket {} already exists and contains {} objects'.format( bucket.name, num_objs)) else: print 'Creating new bucket {}'.format(bname) bucket = conn.create_bucket(bname) k = key.Key(bucket) print "\nCreating {} {}KB random data files".format(args.num_objs, args.bytes / 1024) for n in xrange(args.num_objs): fname = '/tmp/rgwtestdata-%d' % (n) with open(fname, 'w') as fd: fd.write(os.urandom(args.bytes)) if args.num_objs: print "Uploading {} objects to bucket '{}'".format(args.num_objs, bname) d1 = datetime.datetime.now() for n in xrange(args.num_objs): objname = 'obj{}'.format("{}+{}".format(n, time.time())) ext = "" if args.objnamelen > len(objname):
def delete_file(self, path): bucket_key = key.Key(self.bucket) bucket_key.key = path.lstrip('/') self.bucket.delete_key(bucket_key)
def backup(self): # Test that the replica set is in a good state to perform backups test_result, err_str = self.test_replicaset() if test_result is False: raise RuntimeError(err_str) # Choose a member from which to back up backup_member = self.choose_member() # Connect to the backup target directly backup_member_mongo = MongoClient(host=backup_member[0], port=backup_member[1]) self.logger.debug("Connected to mongo target %s" % backup_member_mongo) freeze_rs = True if backup_member_mongo.admin.command('isMaster').get('hidden', False): # This member is hidden so we can safely take backups without # doing any other maintenance work freeze_rs = False # Remove the member from the replica set (mark as hidden) if freeze_rs: # Can probably use replSetMaintenance here but not available # in my testing version if self.dryrun: self.logger.debug("Would have frozen replica set") else: self.logger.debug('Freezing replica set') backup_member_mongo.admin.command({'replSetFreeze': 86400}) else: self.logger.debug( "Skipping replica set freeze, %s is a hidden member" % backup_member[0]) if self.dryrun: self.logger.debug( "Would have dumped databases on {backup_member}".format( backup_member=backup_member)) else: self.logger.debug("Dumping databases on {backup_member}".format( backup_member=backup_member)) for database in backup_member_mongo.database_names(): if database not in args.exclude_dbs: mongodump = 'mongodump -h {backup_member} -d {database} '\ '-o {backup_member} --quiet'.format( backup_member=backup_member[0], database=database ) mongodump = mongodump.split(' ') subprocess.check_output(mongodump, stderr=subprocess.STDOUT) # Unlock mongo if self.dryrun: self.logger.debug("Would have unlocked mongo") else: if freeze_rs: self.logger.debug('Unfreezing replica set') backup_member_mongo.admin.command({'replSetFreeze': 0}) # Archive and upload to S3 if self.dryrun: self.logger.debug("Would have archived dumps and uploaded to S3") else: self.logger.debug("Archiving dumps and uploading to S3") dumps = listdir(backup_member[0]) for dump in dumps: archive_path = backup_member[0] + '/' + dump archive_name = dump + '_' + self.creation_time + '.tar.gz' with tarfile.open(archive_name, 'w:gz') as tar: tar.add(archive_path, arcname=dump) bucket = self.s3.get_bucket(s3_bucket_name) key_obj = key.Key(bucket) key_obj.key = s3_bucket_dest_dir + '/' + archive_name key_obj.set_contents_from_filename(archive_name)
def find(self, localedir=None, languages=None, all=0): """ Mimic gettext.find almost exactly -- os.path.exists is replaced with assembling an S3 key and checking for its existence instead. :param localedir: an optional localedir where translations are found. :param languages: which languages to search for. :param all: whether or not to read all found files, or just the first. :return: a list of file paths or a single file path in S3. """ conn = self._get_conn() bucket = conn.get_bucket(self._bucket_name) if localedir is None: localedir = self._default_localedir if languages is None: languages = [] for envar in DEFAULT_ENVVARS: val = os.environ.get(envar) if val: languages = val.split(":") break if "C" not in languages: languages.append("C") nelangs = [] for lang in languages: for nelang in getattr(gettext, "_expand_lang")(lang): if nelang not in nelangs: nelangs.append(nelang) result = [] if all else None domain_mo = "%s.mo" % self._domain for lang in nelangs: if lang == "C": break mofile = os.path.join( localedir, lang, LC_MESSAGES, domain_mo ) mofile_lp = os.path.join( "locale-langpack", lang, LC_MESSAGES, domain_mo ) key = s3_key.Key(bucket=bucket, name=mofile) if key.exists(): if all: result.append(mofile) else: return mofile key = s3_key.Key(bucket=bucket, name=mofile_lp) if key.exists(): if all: result.append(mofile_lp) else: return mofile_lp return result