def _get_file_from_bucket(s3_conn, bucket_name, remote_filename, local_filename): local_filename = os.path.join(LOCAL_PATH, local_filename) try: # log.debug("Establishing handle with bucket '%s'" % bucket_name) b = s3_conn.get_bucket(bucket_name) # log.debug("Establishing handle with file object '%s'" % remote_filename) k = Key(b, remote_filename) log.debug("Attempting to retrieve file '%s' from bucket '%s'" % (remote_filename, bucket_name)) if k.exists(): k.get_contents_to_filename(local_filename) log.info( "Successfully retrieved file '%s' from bucket '%s' to '%s'." % (remote_filename, bucket_name, local_filename)) return True else: log.error("File '%s' in bucket '%s' not found." % (remote_filename, bucket_name)) return False except S3ResponseError, e: log.error("Failed to get file '%s' from bucket '%s': %s" % (remote_filename, bucket_name, e)) return False
def calculate_checksum_s3_file(s3_path, file_name, s3_bucket): """ Get checksum for file on s3 Notes: etag may or may not be checksum. depends how the file was uploaded As such it's not reliable to use as checksum """ try: pass local_file_md5sum = None local_s3_file_path = './scripts/loading/data/' + file_name s3_conn = boto.connect_s3(S3_ACCESS_KEY, S3_SECRET_KEY) bucket = s3_conn.get_bucket(s3_bucket) key_item = Key(bucket) key_item.key = s3_path file_s3 = bucket.get_key(key_item.key) key_item.get_contents_to_filename(local_s3_file_path) hash_md5 = hashlib.md5() with open(local_s3_file_path, 'rb') as file: for chunk in iter(lambda: file.read(4096), b""): hash_md5.update(chunk) local_file_md5sum = hash_md5.hexdigest() if os.path.exists(local_s3_file_path): os.remove(local_s3_file_path) else: logging.error('file not found: ' + local_s3_file_path) return local_file_md5sum except Exception as e: logging.error("Exception occurred", exc_info=True) return None
def _getDataFiles(self, file_master=0): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ comm = self._comm working_dir = self._working_dir data_source_bucket = self._datasource_bucket if comm.rank == file_master: if not op.exists(op.join(working_dir, 'metadata.txt')): conn = boto.connect_s3() b = conn.get_bucket(data_source_bucket) k = Key(b) k.key = 'metadata.txt' k.get_contents_to_filename(op.join(working_dir, 'metadata.txt')) if comm.rank == file_master: if not op.exists(op.join(working_dir, 'trimmed_dataframe.pandas')): conn = boto.connect_s3() b = conn.get_bucket(self._working_bucket) k = Key(b) k.key = 'trimmed_dataframe.pandas' k.get_contents_to_filename( op.join(working_dir, 'trimmed_dataframe.pandas')) comm.barrier()
def fetch(self, key_name, filename): ''' fetch a file from S3 ''' k = Key(self.bucket) k.key = key_name k.get_contents_to_filename(filename)
def download(self, file_path=None, destination=None): f = open(destination, 'w') f.close() k = Key(self.bucket) k.key = file_path k.get_contents_to_filename(destination) return
def get_file(self, prefix, filename): try: k = Key(self.bucket) k.key = self.prefix + prefix k.get_contents_to_filename(filename) except: pass
def get_ses_creds(): """ AWS SES uses its own set of credentials. So, have these creds saved in a file in an S3 bucket and use the AWS creds from the instance user data to retrieve those creds so an email can be sent. Note that the creds used to start the instance (ie, creds in user data) must have access to the file in the S3 bucket for this to work. Furthermore, this method assumes the user data is formatted to match user data of CloudMan (usecloudman.org). """ aws_access_key, aws_secret_key = get_aws_creds() # Get the creds file from the S3 bucket bucket_name = 'imogen-dev' remote_filename = 'ses_creds.yaml' local_file = '/mnt/transient_nfs/ghem/ses_creds.yaml' if aws_access_key is None or aws_secret_key is None: print "Could not retrieve credentials from CloudMan's user data. " \ "Cannot retrieve SES credentials from S3 bucket; not continuing." return None try: s3_conn = S3Connection(aws_access_key, aws_secret_key) b = s3_conn.get_bucket(bucket_name) k = Key(b, remote_filename) k.get_contents_to_filename(local_file) print("Retrieved file '%s' from bucket '%s' to '%s'." \ % (remote_filename, bucket_name, local_file)) except S3ResponseError, e: print("Failed to get file '%s' from bucket '%s': %s" \ % (remote_filename, bucket_name, e)) return None
def open(self, mode): """Opens a file to read or write operations.""" self._bucket_exists_() s3cache.utils.makedirs(self.tmppath) self.mode = mode if 'r' in self.mode or 'a' in self.mode: # opening an existing file, try to copy in from s3 if not in local # cache self.log("trying to open file") use_local_copy = self.mgr.caching if use_local_copy: if not os.path.exists(self.tmppath): self.log( "not found in local cache, attempting to load from S3") use_local_copy = False if not use_local_copy: k = Key(self.mgr.bucket, self.path) try: k.get_contents_to_filename(self.tmppath) self.log("file located in S3, downloaded from S3 to cache") except S3ResponseError: raise S3CacheIOError("//{0}/{1}" .format(self.mgr.bucket_name, self.path)) else: self.log("file found in local cache") else: self.log("opening new file in local cache for writing") # open the local file self.log("opening local cache file(" + self.tmppath + ")") self.file = open(self.tmppath, self.mode)
def get_file_from_s3(self, key_path, dest_path): logging.info('getting file from s3: ' + key_path + ' into ' + dest_path) conn = boto.connect_s3() bucket = conn.get_bucket(self.conf.get('code_repo', 'repo_bucket')) k = Key(bucket) k.key = key_path k.get_contents_to_filename(dest_path)
def get_from_s3(): conn = S3Connection('AKIAJZ5NU5RXHVW3QXPA', 'dHE5tDMMk/WwAoyvrd44TaKsJfnNqLSjEUGOmXt5') bucketname = conn.get_bucket('scrapy_data_2') print bucketname k = Key(bucketname) k.key = 'my_scrapy' k.get_contents_to_filename('getjson.json')
def run_s3(in_bucket_name, filename, out_bucket_name, path, ramtemp, debug_image): conn = S3Connection(config.access_key, config.secret_key, is_secure=False) in_bucket = conn.get_bucket(in_bucket_name) out_bucket = conn.get_bucket(out_bucket_name) dirpath = tempfile.mkdtemp(dir='/tmp/ram/' if ramtemp else None) logging.debug('Temp directory in {}'.format(dirpath)) try: # copy into temp key = Key(in_bucket, filename) target = os.path.join(dirpath, os.path.basename(filename)) key.get_contents_to_filename(target) # run algos files = run_local(target, dirpath, debug_image, True) # write files back to s3 for f in files[0]: key = Key(out_bucket, os.path.join(path, 'json', os.path.basename(f))) key.set_contents_from_filename(f) for f in files[1]: key = Key(out_bucket, os.path.join(path, 'img', os.path.basename(f))) key.set_contents_from_filename(f) for f in files[2]: key = Key(out_bucket, os.path.join( path, 'text-masked', os.path.basename(f))) key.set_contents_from_filename(f) finally: shutil.rmtree(dirpath)
def download_original(relative_path, static_path, bucket_id): conn = S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(bucket_id) k = Key(bucket) k.key = relative_path destination = os.path.abspath(os.path.join(static_path, relative_path)) k.get_contents_to_filename(destination)
class S3Wrapper: def __init__(self): self.s3 = boto.connect_s3() def setBucket(self, bucket_name): self.bucket_name = bucket_name self.bucket = self.s3.get_bucket( self.bucket_name ) self.k = Key(self.bucket) def copyFilesToS3(self, keyname, filename): self.k.key = keyname self.k.set_contents_from_filename(filename) def downloadFile(self, keyname, filename): self.k.key = keyname self.k.get_contents_to_filename(filename) def getBucketList(self, folder): return [fn.name.split('/')[1] for fn in self.bucket.list() if folder in fn.name and fn.name.split('/')[1]] def clearEntireBucket(self): for key in self.bucket.list(): self.bucket.delete_key(key) print(' * deleted {} *'.format(key.name))
class GetObject(object): def __init__(self, id=None, bucket=None): self.connection = Storage(bucket) self.storage = self.connection.instance self.kobj = Key(self.storage) self.kobj.key = id def get(self, path=DEFAULT_SAVE_PATH): if self.kobj.exists(): self.filepath = os.path.join(path, self.kobj.key) self.kobj.get_contents_to_filename(self.filepath, cb=self.progress) else: raise StoringException('Key <%s> invalid.' % self.kobj.key) def progress(self, part, complete): if part == complete: return True else: return False def delete(self): return self.kobj.delete() @property def path(self): return self.filepath
def pull_from_hyperstore(key_name): conn = boto.connect_s3(host='tims4.mobi-cloud.com', port=80, is_secure=False) bucket = Bucket(conn, bucket_name) gkey = Key(bucket=bucket, name=key_name) gkey.get_contents_to_filename("this.json")
def _getDataFiles(self,file_master=0): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ comm = self._comm working_dir = self._working_dir data_source_bucket = self._datasource_bucket if comm.rank == file_master: if not op.exists(op.join( working_dir,'metadata.txt')): conn = boto.connect_s3() b = conn.get_bucket(data_source_bucket) k = Key(b) k.key = 'metadata.txt' k.get_contents_to_filename(op.join( working_dir,'metadata.txt')) if comm.rank == file_master: if not op.exists(op.join( working_dir, 'trimmed_dataframe.pandas')): conn = boto.connect_s3() b = conn.get_bucket(self._working_bucket) k = Key(b) k.key ='trimmed_dataframe.pandas' k.get_contents_to_filename(op.join( working_dir,'trimmed_dataframe.pandas')) comm.barrier()
def download_week_file(week, year): conn = boto.connect_s3( AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY ) bucket = conn.get_bucket(BUCKET_NAME) file_path = 'foosball/week-%s-%s.json' % (week, year) key = Key(bucket, file_path) f = open("/tmp/blah.%s.json" % time.time(), 'wr+') try: key.get_contents_to_filename(f.name) f.seek(0) result = json.load(f) except boto.exception.S3ResponseError as e: result = { "meta": {"updated": 0}, "games": [], "teams": { "0": { "name": "Pink", "players": [] }, "1": { "name": "Blue", "players": [] } } } os.unlink(f.name) f.close()
def cache_item(payload): # "source": "s3://my-bucket/key" # "target": "/my-path/key.maybe-extension-too # "bucket": "my-bucket" # "key": "key" print "received request to cache " + payload['bucket'] + '/' + payload[ 'key'] + ' to ' + payload['target'] bucket = S3_connection.get_bucket(payload['bucket']) S3_key = Key(bucket) S3_key.key = payload['key'] target = settings.CACHE_ROOT + payload['target'].decode('utf-8') target_path = '/'.join(target.split('/')[0:-1]) if not os.path.isdir(target_path): os.makedirs(target_path) if os.path.exists(target): print "already exists in cache" else: S3_key.get_contents_to_filename(target) print "downloaded " + payload['key'] + " from s3"
def get(args): fp = "/tmp/" bucket = conn.get_bucket(args.bucket) dump = args.dbdump k = Key(bucket) k.key = dump fp = fp + k.name print (Color.GREEN + "Retrieving database dump %s" + Color.END) % fp progress_callback = progress_for('downloading') progress_callback(0, 0, False) k.get_contents_to_filename(fp, cb=progress_callback) print Color.CYAN + "Download complete!" print Color.YELLOW + " decompressing database dump..." cmd = "tar xfvs %s" % fp print "tar xfvz %s" % fp subprocess.call(cmd, shell=True) print "Importing database dump into ella database..." sqlfp = k.name[:-17] + "sql" cmd = "psql -U jonathan -d ella -f %s " % sqlfp print ("psql -U jonathan -d ella -f %s ") % sqlfp subprocess.call(cmd, shell=True) print "Import finished" print "Removing compressed dump and *.sql files..." os.remove(fp) dir = os.getcwd() os.remove(dir+"/"+sqlfp) os.remove(dir+"/ella-pgglobals.sql") print "Operation completed succesfully" print
def s3_get(server, src_filename, dest_filename): conn = boto.connect_s3(local.S3_AUTH[server]['key'], local.S3_AUTH[server]['secret']) bucket = conn.get_bucket(local.S3_BUCKET[server]) k = Key(bucket) k.key = src_filename k.get_contents_to_filename(os.path.join(log_cache, dest_filename))
def get(args): fp = "/tmp/" bucket = conn.get_bucket(args.bucket) dump = args.dbdump k = Key(bucket) k.key = dump fp = fp + k.name print(Color.GREEN + "Retrieving database dump %s" + Color.END) % fp progress_callback = progress_for('downloading') progress_callback(0, 0, False) k.get_contents_to_filename(fp, cb=progress_callback) print Color.CYAN + "Download complete!" print Color.YELLOW + " decompressing database dump..." cmd = "tar xfvs %s" % fp print "tar xfvz %s" % fp subprocess.call(cmd, shell=True) print "Importing database dump into ella database..." sqlfp = k.name[:-17] + "sql" cmd = "psql -U jonathan -d ella -f %s " % sqlfp print("psql -U jonathan -d ella -f %s ") % sqlfp subprocess.call(cmd, shell=True) print "Import finished" print "Removing compressed dump and *.sql files..." os.remove(fp) dir = os.getcwd() os.remove(dir + "/" + sqlfp) os.remove(dir + "/ella-pgglobals.sql") print "Operation completed succesfully" print
def download(self, resource): """ Download a resource. :param resource: An instance of `django_s3.resource.Resource` :return: The absolute filename of the downloaded file. """ filename = os.path.join(django_s3_settings.S3_LOCAL_PATH, resource.name) # If the file exists do not download again. if not os.path.exists(filename): Transport.logger.info( _('Downloading {} to {}.'.format(resource.name, filename))) try: key_holder = Key(self.__bucket) key_holder.key = "{}/{}/{}".format( settings.S3_CATEGORY_MAP[resource.category_code], resource.folder_name, resource.name) key_holder.get_contents_to_filename(filename) except Exception as err: Transport.logger.error( _("Error downloading file: {}. Error: {}".format( resource.name, err))) # Right now we don't know what exceptions are expected here, we propagate the error # up. If we found some exception then we'll add the proper handler. raise else: Transport.logger.info( _('File already exists, skipping download: {}'.format( filename))) return filename
def download_s3(self, package): """ Download from private s3 distributions. """ package_path = path_join(self.path, package) aws_access_key_id = get_env_variable_or_raise_error( "AWS_ACCESS_KEY_ID") aws_secret_access_key = get_env_variable_or_raise_error( "AWS_SECRET_ACCESS_KEY") bucket_name = getenv("BUCKET", DIST_HOST) # connect to the bucket conn = boto.s3.connect_to_region( "eu-west-1", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, is_secure=True, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(bucket_name) # Get the Key object of the given key, in the bucket k = Key(bucket, package) # Ensure the destination exist try: makedirs(self.path) except OSError: pass self.write("Downloading from aws bucket %s... " % bucket_name) # Get the contents of the key into a file k.get_contents_to_filename(package_path) return package_path
def validate(conn, body, message): #download the GTFS files and run them through the feed validator try: #create a working directory for this feed #directory = tempfile.mkdtemp() now = datetime.now() directory = "/mnt/req%s_%s" % (body['request_id'], now.strftime("%F-%T")) os.makedirs(directory) files = body['files'] out = [] for s3_id in files: bucket = s3_bucket() key = Key(bucket) key.key = s3_id basename = os.path.basename(s3_id) path = os.path.join(directory, basename) key.get_contents_to_filename(path) result = subprocess.Popen(["/usr/local/bin/feedvalidator.py", "-n", "--output=CONSOLE", "-l", "10", path], stdout=subprocess.PIPE) out.append({"key" : s3_id, "errors" : result.stdout.read()}) os.remove(path) os.rmdir(directory) publisher = conn.Producer(routing_key="validation_done", exchange=exchange) publisher.publish({'request_id' : body['request_id'], 'output' : out}) message.ack() except: now = datetime.now() errfile = "/var/otp/val_err_%s_%s" % (body['request_id'], now.strftime("%F-%T")) traceback.print_exc(file=open(errfile,"a"))
def restore(self, backup_key): # get the database backup s3, bucket = self.connect() if isinstance(backup_key, boto.s3.key.Key): assert backup_key in bucket, "Cannot restore from key outside S3Bucket (%s)!" % S3Bucket else: backup_key = Key(bucket=bucket, name=backup_key) # download to disk backup_key.get_contents_to_filename(os.path.join(self.prefix, Elefant.RestorePath)) # get the credentials for heroku postgres cmd = Elefant.CommandCredentials output = self.run(cmd) # extract credentials credentials = {} for line in output.split('\n'): if Elefant.UrlToken in line: url = line.strip() credentials = self.extract_postgres_url(url) break # apply the restore credentials.update({'dump' : os.path.join(self.prefix, Elefant.RestorePath)}) cmd = Elefant.CommandRestore % credentials try: print self.run(cmd) except subprocess.CalledProcessError as e: # Some warnings associated with pg_restore # can be ignored (harmless), but consult the docs print "PROCESS ERROR CAUGHT:", e # delete the local restore file print self.run("rm %s" % os.path.join(self.prefix, Elefant.RestorePath))
def download_file(self): s3_key = Key(self.bucket) s3_key.key = self.file_name f = open('/tmp/file1.txt','w') f.write(s3_key.key) f.close() file_key = self.bucket.get_key(self.file_name) info(self.logger, "File key: %s" % file_key.metadata) self.metadata = dict(file_key.metadata) output_filename = "%s.%s" % (uuid.uuid4(), self.extension) f = open('/tmp/file.txt','w') f.write(output_filename) f.close() resulting_filename = os.path.join("/opt/content_extraction", output_filename) info(self.logger, "Saving file to %s" % resulting_filename) try: s3_key.get_contents_to_filename(resulting_filename) except: error(self.logger, "Unable to get s3_key to local file.") return None if os.path.isfile(resulting_filename): info(self.logger, "Successfully transferred file from S3 to local disk.") self.downloaded_file = resulting_filename self.files_to_cleanup.append(self.downloaded_file) return resulting_filename else: error(self.logger, "Operations are completed, but no file on disk.") return None
def download_file(self, source, destination): """Download source from s3 server. if destination is a file, the download file path will be the same. If it's a folder, the download file path will be '{}/{}'.format(destination, os.path.basename(source)) :param source: Relative path of the file inside the bucket :param destination: Local path of the directory or file the file should be downloaded to """ if '.' in destination[ -5:]: # the destination is a file path and not a folder dest_file_path = destination else: dest_file_path = os.path.join(destination, os.path.basename(source)) logger.info(f'Download file from "{source}" to "{dest_file_path}"') destination_dir = os.path.dirname(dest_file_path) if not destination_dir: destination_dir = os.getcwd() with lock: if not os.path.exists(destination_dir): os.makedirs(destination_dir) if not os.path.exists(destination_dir): return k = Key(self.bucket_obj) k.key = source k.get_contents_to_filename(dest_file_path) return dest_file_path
def download_file(self, bucket, str_key, path_destination): """ :type bucket: boto.s3.bucket.Bucket :param bucket: Bucket object. :type str_key: str :param str_key: AWS S3 Bucket key. :type path_destination: str :param path_destination: Destination directory for saving. """ self.__logger.info('Downloading file "%s" from [%s] to "%s"' % (str_key, bucket.name, path_destination)) try: os.makedirs(path_destination) except OSError as exception: if exception.errno != errno.EEXIST: self.__logger.error("Can't create a directory \"%s\"!" % path_destination) raise exception file_path = os.path.join(path_destination, str_key) k = Key(bucket) k.key = str_key pb = ProgressBar(prefix='%s ' % str_key) k.get_contents_to_filename(file_path, cb=pb.update)
def loadFromS3(keyString, stringFlag = 0): # connect to s3 s3 = boto.connect_s3() b = s3.get_bucket('mettinger') k = Key(b) k.key = keyString if b.get_key(keyString) == None: print "bad key" return None # if stringFlag load as string... if stringFlag: try: stringObjFromS3 = k.get_contents_as_string() return stringObjFromS3 except: print "s3 string read failure" return "s3 string read failure" #...otherwise load via temporary file else: try: tempFilename = '/Users/mettinger/Data/tempfile' + str(random.randint(0,10000)) k.get_contents_to_filename(tempFilename) fp = open(tempFilename,"r") objectFromS3 = pickle.load(fp) fp.close() os.remove(tempFilename) return objectFromS3 except: print "s3 read pickle failure" return "s3 read pickle failure"
def main(argv): ## PARAM OVERRIDES KurmaAWSTestLib.GLOBAL_DEBUG = 1 bucket_name = 'readafterwrite003kurmaeu' ret = KurmaAWSTestLib.fetchArgs(argv) if (ret == -1): sys.exit(2) userObj = boto.s3.connect_to_region( 'eu-west-1', aws_access_key_id=KurmaAWSTestLib.user_profiles[0]['access'], aws_secret_access_key=KurmaAWSTestLib.user_profiles[0]['secret'], calling_format=boto.s3.connection.OrdinaryCallingFormat()) bucket = userObj.get_bucket(bucket_name) i = 1 j = 0 while (i < 21 and j < 10000): j = j + 1 k = Key(bucket) keystring = 'testobj' + str(i) k.key = keystring try: k.get_contents_to_filename(keystring) print("Read " + keystring + " at: " + str(datetime.now())) i = i + 1 except: #print("==== Read failed at: " + str(datetime.now())) i = i return
def _get_data_files(self): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ s2f = self._s3_to_fname while not op.exists(op.join( self.working_dir, s2f(self.meta_file))): try: conn = boto.connect_s3() b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.meta_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.meta_file))) except: time.sleep(random.random()) while not op.exists(op.join( self.working_dir, s2f(self.data_file))): conn = boto.connect_s3() try: b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.data_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.data_file)) ) except S3ResponseError: self.logger.exception( 'Master has not generated files' ) raise except OSError: time.sleep(random.random())
def download(bucketName,s3ObjectKey): c = boto.connect_s3() b = c.get_bucket(bucketName) # substitute your bucket name here from boto.s3.key import Key k = Key(b) k.key = s3ObjectKey k.get_contents_to_filename('/tmp/private-bootstrap.zip')
def login_page(): if request.method == 'GET': return render_template("login.html") else: user_name = request.form['user_name'] pass_word = request.form['pass_word'] conn = boto.connect_s3(ACCESS_KEY, SECRET_KEY) bucket = conn.get_bucket('<BUCKET_NAME>') check = False k = Key(bucket) k.key = 'users.txt' if not os.path.exists(UPLOAD_FOLDER + str(k.key)): k.get_contents_to_filename(UPLOAD_FOLDER + 'users.txt') list = [] with open(UPLOAD_FOLDER + 'users.txt', 'rb') as f: for line in f: print "Line :", line list = line.strip().split("=") print list[0], list[1] if list[0] == user_name and list[1] == pass_word: check = True break else: check = False os.remove(UPLOAD_FOLDER + 'users.txt') if check == True: session['user_name'] = user_name return redirect("/home") else: flash('Invalid username or password') return render_template("login.html")
def download_file(self, file_to_load, local_save_dir): """ :description: load a file from a given s3 bucket with a given name and save to a local dir :type s3_bucket: string :param s3_bucket: s3 bucket from which to load the file :type file_to_load: string :param file_to_load: the file to load :type local_save_dir: string :param local_save_dir: the local dir to which to save the downloaded file :return: the location where the file was saved """ # select the bucket, where input_s3_bucket takes the form 'bsdsdata' bucket = self.conn.get_bucket(self.s3_bucket) # set a key to the processed files list key = Key(bucket, file_to_load) key_name = key.name.encode('utf-8') # download the file to process and save in the input location save_location = os.path.join(local_save_dir, key_name) try: key.get_contents_to_filename(save_location) except boto.exception.S3ResponseError as e: raise boto.exception.S3ResponseError("key name: {} failed".format(key_name)) # return the location of the downloaded file return save_location
def download_system_model(self, model_s3_key, model_destination_path): # TODO: Get the base directory of the destination path and remove all old models # that are not the default English model. s3_key = Key(self.bucket) s3_key.key = model_s3_key s3_key.get_contents_to_filename(model_destination_path) return os.path.isfile(model_destination_path)
def getDataFileS3Bucket(stockName): '''Create S3 Bucket to download csv''' try: import boto from boto.s3.key import Key keyId = "AKIARWXZ333LUXA7HNM6" sKeyId = "VJdj3bNTXxEbNuL6UjQcMz44ok5IJ9rGWXC9aU3a" srcFileName = "{0}.csv".format(stockName) destFileName = "s3_{0}".format(stockName) bucketName = "dah2-h2h-table44" conn = boto.connect_s3(keyId, sKeyId) bucket = conn.get_bucket(bucketName) # Get the Key object of the given key, in the bucket k = Key(bucket, srcFileName) # Get the contents of the key into a file k.get_contents_to_filename(destFileName) return destFileName except Exception as e: raise e
def get_file_from_s3(filename): conn = S3Connection(ACCESS_KEY, SECRET, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(BUCKET_NAME) k = Key(bucket) k.key = filename k.get_contents_to_filename(filename)
def get_image(): # get the image stored on the S3 bucket, and save it to a local file. connection = S3Connection() bucket = connection.get_bucket('S3 bucket name') key = Key(bucket) key.key = ('test') key.get_contents_to_filename('filename')
def download(self, filename): bucket = self._get_bucket() k = Key(bucket) k.key = self.key k.get_contents_to_filename(filename) return filename
def process_raw_message(raw_message, queue, bucket, dest): info_message("Message received...") info_message(raw_message.get_body()) try: message = json.loads(raw_message.get_body()) filename = message[u'Message'] info_message("Processing %s." % filename) if not os.path.exists(filename): info_message("Downloading %s." % filename) s3 = boto.connect_s3() bucket = s3.get_bucket(bucket, validate=False) key = Key(bucket) key.key = filename key.get_contents_to_filename(filename) path = extract_artefacts(filename) copy_to_dest(path, dest) cleanup(path, filename, message[u'MessageId']) info_message("Message processed.") # Delete message from the queue queue.delete_message(raw_message) except Exception as ex: error_message("Unable to process message, exception occurred, message will be placed back on the queue") error_message(ex)
def get_s3_asset(remote_filename, temp_filepath, bucket_name=None, asset_depot=None): try: print "get_s3_asset remote_filename", remote_filename conn = None asset_depot = asset_depot if not asset_depot: asset_depot = S3Connection( os.environ.get("AWS_ACCESS_KEY_ID"), os.environ.get("AWS_SECRET_ACCESS_KEY") ).get_bucket(bucket_name) if not os.path.exists(os.path.dirname(temp_filepath)): try: os.makedirs(os.path.dirname(temp_filepath)) except: pass # race condition? asset_depot_key = Key(asset_depot) asset_depot_key.key = remote_filename asset_depot_key.get_contents_to_filename(temp_filepath) if os.path.exists(temp_filepath): return True, os.path.getsize(temp_filepath) except Exception, e: return False, -1
def _get_source_data( working_dir, run_model ): """ Downloads the data from s3 to the local machine for processing """ if not os.path.exists(working_dir): logging.info( "Creating directory [%s]" % ( working_dir ) ) os.makedirs(working_dir) sd = run_model['source_data'] #grab filenames we are interested in file_list = [f for k, f in sd.iteritems() if k[-4:] == 'file'] conn = boto.connect_s3() bucket = conn.get_bucket( sd['bucket'] ) for key_name in file_list: s3_path, fname = os.path.split(key_name) local_path = os.path.join(working_dir, fname) try: logging.info( "Transferring s3://%s/%s to %s" % (sd['bucket'],key_name, local_path )) k = Key(bucket) k.key = key_name k.get_contents_to_filename(local_path) logging.info("Transfer complete") except S3ResponseError as sre: logging.error("bucket:[%s] file:[%s] download." % (sd['bucket'],key_name)) logging.error(str(sre)) raise(sre)
def get_file_from_bucket(conn, bucket_name, remote_filename, local_file, validate=False): """ Retrieve a file `remote_filename` form bucket `bucket_name` to `local_file`. If `validate` is set, make sure the bucket exists by issuing a HEAD request before attempting to retrieve a file. Return `True` if the file was successfully retrieved. If an exception occurs or a zero size file is retrieved, return `False`. """ if bucket_exists(conn, bucket_name): b = get_bucket(conn, bucket_name, validate) k = Key(b, remote_filename) try: k.get_contents_to_filename(local_file) if os.path.getsize(local_file) != 0: log.debug("Retrieved file '%s' from bucket '%s' on host '%s' to '%s'." % (remote_filename, bucket_name, conn.host, local_file)) else: log.warn("Got an empty file ({0})?!".format(local_file)) return False except S3ResponseError as e: log.debug("Failed to get file '%s' from bucket '%s': %s" % ( remote_filename, bucket_name, e)) if os.path.exists(local_file): os.remove(local_file) # Don't leave a partially downloaded or touched file return False else: log.debug("Bucket '%s' does not exist, did not get remote file '%s'" % ( bucket_name, remote_filename)) return False return True
def fetch(creds, event_name): logger = logging.getLogger(__name__) # set filename based on event filename = event_name + ".ar.gz.enc" # sanity checks if not os.path.exists(creds): logger.error("Error: cannot find creds file '%s'" % creds) sys.exit("Error: cannot find creds file '%s'" % creds) # load credentials creds = json.loads(open(creds, "r").read()) # connect to S3 s3 = boto.connect_s3(creds['access_id'], creds['access_key']) bucket = s3.get_bucket(creds['distribution_bucket']) # download the bundle k = Key(bucket) k.key = event_name + "/" + filename k.get_contents_to_filename(filename) logger.info("bundle downloaded to %s" % filename);
def download_file(self, s3_bucket, file_to_load, local_save_dir): """ :description: load a file from a given s3 bucket with a given name and save to a local dir :type s3_bucket: string :param s3_bucket: s3 bucket from which to load the file :type file_to_load: string :param file_to_load: the file to load :type local_save_dir: string :param local_save_dir: the local dir to which to save the downloaded file :return: the location where the file was saved """ # select the bucket, where input_s3_bucket takes the form 'bsdsdata' bucket = self.conn.get_bucket(s3_bucket) # set a key to the processed files list key = Key(bucket, file_to_load) key_name = key.name.encode('utf-8') # download the file to process and save in the input location save_location = os.path.join(local_save_dir, key_name) try: key.get_contents_to_filename(save_location) except boto.exception.S3ResponseError as e: raise boto.exception.S3ResponseError( "key name: {} failed".format(key_name)) # return the location of the downloaded file return save_location
def get_file_from_s3(filename): conn = S3Connection(ACCESS_KEY, SECRET) bucket = conn.get_bucket(BUCKET_NAME) k = Key(bucket) k.key = filename k.get_contents_to_filename(filename)
def download(bucketName, s3ObjectKey): c = boto.connect_s3() b = c.get_bucket(bucketName) # substitute your bucket name here from boto.s3.key import Key k = Key(b) k.key = s3ObjectKey k.get_contents_to_filename('/tmp/private-bootstrap.zip')
def download_file(bucket, keystr, filename): try: key = Key(bucket=bucket, name=keystr) key.get_contents_to_filename(filename) except Exception, e: print "Attempted to fetch {} from {} as {}".format(keystr, bucket, filename) print "Download failed: {}".format(e)
def download_build(build_dir, bucket_name, project_name): ''' Downloads build.zip from the specified S3 bucket and unpacks it into the specified build directory. @type base_dir: String @param base_dir: Build directory @type bucket_name: String @param bucket_name: Name of the S3 bucket to use @type project_name: String @param project_name: Name of the project folder inside the S3 bucket ''' # Clear any previous builds if os.path.exists(build_dir): shutil.rmtree(build_dir) os.mkdir(build_dir) zip_dest = os.path.join(build_dir, "build.zip") conn = S3Connection() bucket = conn.get_bucket(bucket_name) remote_key = Key(bucket) remote_key.name = "%s/build.zip" % project_name remote_key.get_contents_to_filename(zip_dest) subprocess.check_call(["unzip", zip_dest, "-d", build_dir])
def cache_item(payload): # "source": "s3://my-bucket/key" # "target": "/my-path/key.maybe-extension-too # "bucket": "my-bucket" # "key": "key" print "received request to cache " + payload['bucket'] + '/' + payload['key'] + ' to ' + payload['target'] bucket = S3_connection.get_bucket(payload['bucket']) S3_key = Key(bucket) S3_key.key = payload['key'] target = settings.CACHE_ROOT + payload['target'].decode('utf-8') target_path = '/'.join(target.split('/')[0:-1]) if not os.path.isdir(target_path): os.makedirs(target_path) if os.path.exists(target): print "already exists in cache" else: S3_key.get_contents_to_filename(target) print "downloaded " + payload['key'] + " from s3"
def download(self, key_name, filename): k = Key(self.bucket) k.key = key_name k.get_contents_to_filename(filename) logger.info('Download %s -> %s', key_name, filename)
def download_build(self, build_dir): ''' Downloads build.zip from the specified S3 bucket and unpacks it into the specified build directory. @type base_dir: String @param base_dir: Build directory @type bucket_name: String @param bucket_name: Name of the S3 bucket to use @type project_name: String @param project_name: Name of the project folder inside the S3 bucket ''' # Clear any previous builds if os.path.exists(build_dir): shutil.rmtree(build_dir) os.mkdir(build_dir) zip_dest = os.path.join(build_dir, self.zip_name) remote_key = Key(self.bucket) remote_key.name = self.remote_path_build remote_key.get_contents_to_filename(zip_dest) subprocess.check_call(["unzip", zip_dest, "-d", build_dir])
def handle(self, *args, **kwargs): conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket('api_db_test') # print bucket k = Key(bucket, 'api_db_2017-05-13-01:43.sql.gz') k.get_contents_to_filename('test.gz') os.system('mv test.gz /opt/bin/mysql_load/')
def download_file(full_filename): path, filename = split_filename(full_filename) conn, bucket = get_connection_and_bucket() full_key_name = os.path.join(path, filename) print("Attempting to download from {}".format(full_key_name)) k = Key(bucket) k.key = full_key_name k.get_contents_to_filename(filename)