예제 #1
0
파일: s3.py 프로젝트: rodriguezjf/pegasus
    def download():
        info("Downloading part %d of %d" % (part, parts))
        f = open(fname, "r+b")

        attempt = 0
        done = False
        saved_ex = None
        while attempt < 3 and done == False:
            attempt += 1
            try:
                f.seek(start, os.SEEK_SET)
                # Need to use a different key object to each thread because
                # the Key object in boto is not thread-safe
                key = Key(bucket=bucketname, name=keyname)
                key.get_file(f,
                             headers={"Range": "bytes=%d-%d" % (start, end)})
                done = True
            except Exception as e:
                saved_ex = e
                debug("Attempt %d failed for part %d" % (attempt, part))
        if done == False:
            raise saved_ex

        info("Part %d finished" % (part))
        f.close()
예제 #2
0
def _get_original_content(name, version, bucket, url):
    """Retrieve the original content

    It tries to download from the S3 bucket first, upon failure it will
    retry downloading from the original url. Return None if none of retrieval
    works.
    """
    original = tempfile.NamedTemporaryFile(prefix='splice', suffix='.zip')
    s3_key = "{0}/v{1}/{2}".format(name, version, _ORIGINAL_NAME)
    k = Key(bucket)
    k.key = s3_key
    try:
        k.get_file(original)
    except:
        env.log("Failed to download %s from S3" % s3_key)
    else:
        return original

    try:
        r = requests.get(url)
        original.write(r.content)
    except:
        env.log("Failed to download content from %s" % url)
        original.close()
        return None
    else:
        return original
예제 #3
0
파일: views.py 프로젝트: seansaito/Roster
def find_family(fingerprint):
    """
    Finds a family from the json files based on fingerprint
    """
    # If static_store_strategy is LOCAL, then just lookup local files
    if static_store_strategy == "LOCAL":
        print "[find_family] Going local strategy"
        theOne = ""
        for key in ["top10_bandwidth.json", "top10_consensus.json", "all.json"]:
            array_store = []

            with open(abs_paths[key[:-5]], "r") as fp:
                array_store = json.load(fp)
                fp.close()

            for family in array_store:
                for relay in family["families"]:
                    if relay["fingerprint"] == fingerprint:
                        theOne = family
                        break

            if theOne != "":
                break

        return theOne

    # Fetch latest json files from S3
    c = boto.connect_s3(acc_key, acc_sec)
    b = c.get_bucket(bucket)
    bucket_key = Key(b)

    theOne = ""
    for key in ["top10_bandwidth.json", "top10_consensus.json", "all.json"]:
        array_store = []
        bucket_key.key = key

        # Load the json to the temporary array store
        with open(abs_paths[key[:-5]], "w+") as fp:
            bucket_key.get_file(fp)
            fp.seek(0)
            array_store = json.load(fp)
            fp.close()

        # Loop through families to search for the family with given fingerprint
        for family in array_store:
            for relay in family["families"]:
                if relay["fingerprint"] == fingerprint:
                    theOne = family
                    break

        if theOne != "":
            break

    return theOne
예제 #4
0
    def get_resource_to_file_pointer(self, resource, file_path):
        bucket, s3_key = self.s3_storage_objects(resource)
        key = Key(bucket)
        key.key = s3_key

        fp = open(file_path, mode='wb')

        key.get_file(fp)
        fp.close()

        assert key.size == os.path.getsize(file_path), \
                ("The file size of the local cached copy does not correspond to the original size on S3 of %s. "
                 "This points to a corrupted download, check what's in %s" % (key.name, file_path))
        fp = open(file_path, mode='rb')
        return fp
예제 #5
0
    def get_resource_to_file_pointer(self, resource, file_path):
        bucket, s3_key = self.s3_storage_objects(resource)
        key = Key(bucket)
        key.key = s3_key

        fp = open(file_path, mode='wb')

        key.get_file(fp)
        fp.close()

        assert key.size == os.path.getsize(file_path), \
                ("The file size of the local cached copy does not correspond to the original size on S3 of %s. "
                 "This points to a corrupted download, check what's in %s" % (key.name, file_path))
        fp = open(file_path, mode='rb')
        return fp
예제 #6
0
파일: main.py 프로젝트: jpennell/s3-scripts
    def get_files(self, bucket, names, prefix=None, delimiter=None, directory='down', replace_directory=True):

        if replace_directory:
            if os.path.exists(directory):
                shutil.rmtree(directory)

        if not os.path.exists(directory):
            os.makedirs(directory)

        for name in names:
            file = open(directory + '/' + name, 'w')
            if prefix and delimiter:
                key = Key(bucket=bucket, name=prefix + delimiter + name)
            else:
                key = Key(bucket=bucket, name=name)
            key.get_file(file)
            file.close()
예제 #7
0
def get_s3(req):
  f_name, extension = os.path.splitext(req['fpath'])
  local_path = 'files/' + req['name'] + extension
  if os.path.isfile(local_path):
    return local_path
  conn = S3Connection(os.environ['AWS_ACCESS_KEY'],
      os.environ['AWS_SECRET_KEY'])
  bucket = conn.get_bucket(req['bucket'], validate=False)
  s3path = req['fpath']
  key = Key(bucket, s3path)
  with file(local_path, 'wb') as f:
    def callback(togo, total):
      print "Got {0: 10d} Bytes out of {1:10d} Bytes".format(togo, total)
      if togo == total:
        print "Done! The path of the " + req['name'] + \
          " file was returned."
    key.get_file(f, cb = callback)
  return local_path
예제 #8
0
파일: snippet.py 프로젝트: szabo92/gistable
def handle(name):
    "try to do the copy"
    try:
        #get tmp file
        key = Key(s3_bucket, name)
        #copy to tmp
        fp = open(tmp_file, "w")
        key.get_file(fp)
        fp.close()
        #copy to cf
        fp = open(tmp_file, "r")
        #create the object to copy to
        o = cf_container.create_object(name)
        o.write(fp)
        #cleanup
        fp.close()
        return True
    except Exception:
        print ' retrying'
        return False
예제 #9
0
 def Download_Object(self, obj, bucket, path):
     """
     Downloads given object from bucket to destination file
     
     _obj_: target object
     _bucket_: bucket containing object
     _path_: local path to write object to.  if _path_ is a directory, then
     _obj_ will be used as the filename
     """
     if os.path.isdir(path):
         #This is supposed to be Windows-proof but hasn't been tested
         path = path.rstrip(os.path.sep) + os.path.sep + obj
         
     fp = open(path, 'w')
     
     s3bucket = self._conn.get_bucket(bucket)
     s3obj = Key(s3bucket, obj)
     logger.debug("Calling get_file for object %s, downloading to %s" % 
                  (obj, path))
     s3obj.get_file(fp)
예제 #10
0
파일: s3.py 프로젝트: pegasus-isi/pegasus
    def download():
        info("Downloading part %d of %d" % (part, parts))
        f = open(fname, "r+b")

        attempt = 0
        done = False
        saved_ex = None
        while attempt < 3 and done == False:
            attempt += 1
            try:
                f.seek(start, os.SEEK_SET)
                # Need to use a different key object to each thread because
                # the Key object in boto is not thread-safe
                key = Key(bucket=bucketname, name=keyname)
                key.get_file(f, headers={"Range": "bytes=%d-%d" % (start, end)})
                done = True
            except Exception as e:
                saved_ex = e
                debug("Attempt %d failed for part %d" %(attempt, part))
        if done == False:
            raise saved_ex

        info("Part %d finished" % (part))
        f.close()
예제 #11
0
파일: views.py 프로젝트: seansaito/Roster
def index():
    # Check static_store_strategy. If local, then no need to connect to S3
    if static_store_strategy == "LOCAL":
        print "GET /   Going local strategy"
        data_store = [[],[],[]]
        file_pairs = [(0, "top10_bandwidth"), (1, "top10_consensus"), (2, "all")]
        for store_index, file_name in file_pairs:
            fp = open(abs_paths[file_name], "r")
            data_store[store_index] = json.load(fp)
            fp.close()

        # Unpack the families into individual relays
        all_relays = []
        for family in data_store[2]:
            all_relays = all_relays + [relay for relay in family["families"]]

        data_store[2] = all_relays

        return render_template("index.html", top10_bandwidth=data_store[0],
            top10_consensus=data_store[1], all_relays=data_store[2])

    # First update the bandwidth and cw rankings using the files from S3
    # Connect and retrieve key from S3 bucket
    c = boto.connect_s3(acc_key, acc_sec)
    b = c.get_bucket(bucket)
    bucket_key = Key(b)

    bucket_key.key = "top10_bandwidth.json"
    top10_bandwidth = []

    with open(abs_paths["top10_bandwidth"], "w+") as fp:
        bucket_key.get_file(fp)
        fp.seek(0)
        top10_bandwidth = json.load(fp)
        fp.close()

    bucket_key.key = "top10_consensus.json"
    top10_consensus = []

    with open(abs_paths["top10_consensus"], "w+") as fp:
        bucket_key.get_file(fp)
        fp.seek(0)
        top10_consensus = json.load(fp)
        fp.close()

    bucket_key.key = "all.json"
    all_families = []

    with open(abs_paths["all"], "w+") as fp:
        bucket_key.get_file(fp)
        fp.seek(0)
        all_families = json.load(fp)
        fp.close()

    all_relays = []
    for family in all_families:
        all_relays = all_relays + [relay for relay in family["families"]]

    # Lastly, reset the files for proper file tracking
    # reset_files()

    return render_template("index.html", top10_bandwidth=top10_bandwidth,
        top10_consensus=top10_consensus, all_relays=all_relays)
예제 #12
0
 def _get_file(self, key, fp):
     k = Key(self.bucket)
     k.key = key
     k.get_file(fp)
예제 #13
0
 def _get_file(self, key, fp):
     k = Key(self.bucket)
     k.key = key
     k.get_file(fp)
예제 #14
0
def get_record(patient, record_num = 1, record_type = "full",
                loc_path = 'utils/tmp'):
    r"""Pulls a patient record from the S3 bucket.

    Parameters
    ----------
    patient : string
        the patient number of the patient to pull data for.

    record_num : int, optional
        number of the record to pull. This is usually from 1 to around 10.

    record_type : {'full', 'reduced'}, optional
        'full' by default. 'reduced' has less metadata.

    Returns
    -------
    string
        the filepath of the .mat file of the record (will go to the `\tmp`) directory.

    See Also
    --------
    make_h5py_object : uses filepath to create a h5py object.

    Notes
    -----
    You need to set the amazon keys as environment variables. Put the secret key in ``AWS_SECRET_KEY`` and the regular key in ``AWS_ACCESS_KEY``.

    Uses the ``boto`` package to access AWS.

    Examples
    --------
    These are written in doctest format, and should illustrate how to
    use the function.

    >>> from utils.get_data import get_patients, get_record
    >>> print get_patients()[0]
    'A00055540'
    >>> print get_record(get_patients()[0])
    '~/.../src/utils/tmp/{recordname}.mat'

    """
    patient_path = "data/uploads/" + str(patient) + '/'
    event_name = str(record_type) + "_" + str(patient)
    if record_num < 10:
        event_name += "00" + str(record_num) + ".mat"
    else:
        event_name += "0" + str(record_num) + ".mat"
    local_path = loc_path + patient + "_" + \
            str(record_num) + ".mat"
    print local_path
    if os.path.isfile(local_path):
        print "  there is already a file named: " + \
                local_path + ", returned that path instead of pulling data."
        return local_path
    conn = S3Connection(os.environ['AWS_ACCESS_KEY'],
            os.environ['AWS_SECRET_KEY'])
    bucket = conn.get_bucket('neurodatadesign-test', validate=False)
    s3path = patient_path + event_name
    print s3path
    key = Key(bucket, s3path)
    f = file(local_path, 'wb')
    def callback(togo, total):
        print "Got {0: 10d} Bytes out of {1:10d} Bytes".format(togo, total)
        if togo == total:
            print "Done! The path of the .mat file was returned."
    key.get_file(f, cb = callback)
    return local_path