def download(): info("Downloading part %d of %d" % (part, parts)) f = open(fname, "r+b") attempt = 0 done = False saved_ex = None while attempt < 3 and done == False: attempt += 1 try: f.seek(start, os.SEEK_SET) # Need to use a different key object to each thread because # the Key object in boto is not thread-safe key = Key(bucket=bucketname, name=keyname) key.get_file(f, headers={"Range": "bytes=%d-%d" % (start, end)}) done = True except Exception as e: saved_ex = e debug("Attempt %d failed for part %d" % (attempt, part)) if done == False: raise saved_ex info("Part %d finished" % (part)) f.close()
def _get_original_content(name, version, bucket, url): """Retrieve the original content It tries to download from the S3 bucket first, upon failure it will retry downloading from the original url. Return None if none of retrieval works. """ original = tempfile.NamedTemporaryFile(prefix='splice', suffix='.zip') s3_key = "{0}/v{1}/{2}".format(name, version, _ORIGINAL_NAME) k = Key(bucket) k.key = s3_key try: k.get_file(original) except: env.log("Failed to download %s from S3" % s3_key) else: return original try: r = requests.get(url) original.write(r.content) except: env.log("Failed to download content from %s" % url) original.close() return None else: return original
def find_family(fingerprint): """ Finds a family from the json files based on fingerprint """ # If static_store_strategy is LOCAL, then just lookup local files if static_store_strategy == "LOCAL": print "[find_family] Going local strategy" theOne = "" for key in ["top10_bandwidth.json", "top10_consensus.json", "all.json"]: array_store = [] with open(abs_paths[key[:-5]], "r") as fp: array_store = json.load(fp) fp.close() for family in array_store: for relay in family["families"]: if relay["fingerprint"] == fingerprint: theOne = family break if theOne != "": break return theOne # Fetch latest json files from S3 c = boto.connect_s3(acc_key, acc_sec) b = c.get_bucket(bucket) bucket_key = Key(b) theOne = "" for key in ["top10_bandwidth.json", "top10_consensus.json", "all.json"]: array_store = [] bucket_key.key = key # Load the json to the temporary array store with open(abs_paths[key[:-5]], "w+") as fp: bucket_key.get_file(fp) fp.seek(0) array_store = json.load(fp) fp.close() # Loop through families to search for the family with given fingerprint for family in array_store: for relay in family["families"]: if relay["fingerprint"] == fingerprint: theOne = family break if theOne != "": break return theOne
def get_resource_to_file_pointer(self, resource, file_path): bucket, s3_key = self.s3_storage_objects(resource) key = Key(bucket) key.key = s3_key fp = open(file_path, mode='wb') key.get_file(fp) fp.close() assert key.size == os.path.getsize(file_path), \ ("The file size of the local cached copy does not correspond to the original size on S3 of %s. " "This points to a corrupted download, check what's in %s" % (key.name, file_path)) fp = open(file_path, mode='rb') return fp
def get_files(self, bucket, names, prefix=None, delimiter=None, directory='down', replace_directory=True): if replace_directory: if os.path.exists(directory): shutil.rmtree(directory) if not os.path.exists(directory): os.makedirs(directory) for name in names: file = open(directory + '/' + name, 'w') if prefix and delimiter: key = Key(bucket=bucket, name=prefix + delimiter + name) else: key = Key(bucket=bucket, name=name) key.get_file(file) file.close()
def get_s3(req): f_name, extension = os.path.splitext(req['fpath']) local_path = 'files/' + req['name'] + extension if os.path.isfile(local_path): return local_path conn = S3Connection(os.environ['AWS_ACCESS_KEY'], os.environ['AWS_SECRET_KEY']) bucket = conn.get_bucket(req['bucket'], validate=False) s3path = req['fpath'] key = Key(bucket, s3path) with file(local_path, 'wb') as f: def callback(togo, total): print "Got {0: 10d} Bytes out of {1:10d} Bytes".format(togo, total) if togo == total: print "Done! The path of the " + req['name'] + \ " file was returned." key.get_file(f, cb = callback) return local_path
def handle(name): "try to do the copy" try: #get tmp file key = Key(s3_bucket, name) #copy to tmp fp = open(tmp_file, "w") key.get_file(fp) fp.close() #copy to cf fp = open(tmp_file, "r") #create the object to copy to o = cf_container.create_object(name) o.write(fp) #cleanup fp.close() return True except Exception: print ' retrying' return False
def Download_Object(self, obj, bucket, path): """ Downloads given object from bucket to destination file _obj_: target object _bucket_: bucket containing object _path_: local path to write object to. if _path_ is a directory, then _obj_ will be used as the filename """ if os.path.isdir(path): #This is supposed to be Windows-proof but hasn't been tested path = path.rstrip(os.path.sep) + os.path.sep + obj fp = open(path, 'w') s3bucket = self._conn.get_bucket(bucket) s3obj = Key(s3bucket, obj) logger.debug("Calling get_file for object %s, downloading to %s" % (obj, path)) s3obj.get_file(fp)
def download(): info("Downloading part %d of %d" % (part, parts)) f = open(fname, "r+b") attempt = 0 done = False saved_ex = None while attempt < 3 and done == False: attempt += 1 try: f.seek(start, os.SEEK_SET) # Need to use a different key object to each thread because # the Key object in boto is not thread-safe key = Key(bucket=bucketname, name=keyname) key.get_file(f, headers={"Range": "bytes=%d-%d" % (start, end)}) done = True except Exception as e: saved_ex = e debug("Attempt %d failed for part %d" %(attempt, part)) if done == False: raise saved_ex info("Part %d finished" % (part)) f.close()
def index(): # Check static_store_strategy. If local, then no need to connect to S3 if static_store_strategy == "LOCAL": print "GET / Going local strategy" data_store = [[],[],[]] file_pairs = [(0, "top10_bandwidth"), (1, "top10_consensus"), (2, "all")] for store_index, file_name in file_pairs: fp = open(abs_paths[file_name], "r") data_store[store_index] = json.load(fp) fp.close() # Unpack the families into individual relays all_relays = [] for family in data_store[2]: all_relays = all_relays + [relay for relay in family["families"]] data_store[2] = all_relays return render_template("index.html", top10_bandwidth=data_store[0], top10_consensus=data_store[1], all_relays=data_store[2]) # First update the bandwidth and cw rankings using the files from S3 # Connect and retrieve key from S3 bucket c = boto.connect_s3(acc_key, acc_sec) b = c.get_bucket(bucket) bucket_key = Key(b) bucket_key.key = "top10_bandwidth.json" top10_bandwidth = [] with open(abs_paths["top10_bandwidth"], "w+") as fp: bucket_key.get_file(fp) fp.seek(0) top10_bandwidth = json.load(fp) fp.close() bucket_key.key = "top10_consensus.json" top10_consensus = [] with open(abs_paths["top10_consensus"], "w+") as fp: bucket_key.get_file(fp) fp.seek(0) top10_consensus = json.load(fp) fp.close() bucket_key.key = "all.json" all_families = [] with open(abs_paths["all"], "w+") as fp: bucket_key.get_file(fp) fp.seek(0) all_families = json.load(fp) fp.close() all_relays = [] for family in all_families: all_relays = all_relays + [relay for relay in family["families"]] # Lastly, reset the files for proper file tracking # reset_files() return render_template("index.html", top10_bandwidth=top10_bandwidth, top10_consensus=top10_consensus, all_relays=all_relays)
def _get_file(self, key, fp): k = Key(self.bucket) k.key = key k.get_file(fp)
def get_record(patient, record_num = 1, record_type = "full", loc_path = 'utils/tmp'): r"""Pulls a patient record from the S3 bucket. Parameters ---------- patient : string the patient number of the patient to pull data for. record_num : int, optional number of the record to pull. This is usually from 1 to around 10. record_type : {'full', 'reduced'}, optional 'full' by default. 'reduced' has less metadata. Returns ------- string the filepath of the .mat file of the record (will go to the `\tmp`) directory. See Also -------- make_h5py_object : uses filepath to create a h5py object. Notes ----- You need to set the amazon keys as environment variables. Put the secret key in ``AWS_SECRET_KEY`` and the regular key in ``AWS_ACCESS_KEY``. Uses the ``boto`` package to access AWS. Examples -------- These are written in doctest format, and should illustrate how to use the function. >>> from utils.get_data import get_patients, get_record >>> print get_patients()[0] 'A00055540' >>> print get_record(get_patients()[0]) '~/.../src/utils/tmp/{recordname}.mat' """ patient_path = "data/uploads/" + str(patient) + '/' event_name = str(record_type) + "_" + str(patient) if record_num < 10: event_name += "00" + str(record_num) + ".mat" else: event_name += "0" + str(record_num) + ".mat" local_path = loc_path + patient + "_" + \ str(record_num) + ".mat" print local_path if os.path.isfile(local_path): print " there is already a file named: " + \ local_path + ", returned that path instead of pulling data." return local_path conn = S3Connection(os.environ['AWS_ACCESS_KEY'], os.environ['AWS_SECRET_KEY']) bucket = conn.get_bucket('neurodatadesign-test', validate=False) s3path = patient_path + event_name print s3path key = Key(bucket, s3path) f = file(local_path, 'wb') def callback(togo, total): print "Got {0: 10d} Bytes out of {1:10d} Bytes".format(togo, total) if togo == total: print "Done! The path of the .mat file was returned." key.get_file(f, cb = callback) return local_path