Exemple #1
0
def add(bkt, key, img, form = 'JPEG'):
  bucket = conn.get_bucket(bkt)
  newKeyObj = Key(bucket)
  newKeyObj.key = key
  newKeyObj.set_metadata('Content-Type', 'image/jpg')
  buf = s.StringIO()
  img.save(buf, form)
  newKeyObj.set_contents_from_string(buf.getvalue())
  newKeyObj.set_acl('public-read')
Exemple #2
0
    def upload(self):
        for destination, data, content_type, compressed in self.get_files():
            key = Key(self.bucket)
            key.content_type = content_type
            if compressed:
                key.set_metadata('content-encoding', 'gzip')

            for header, value in self.headers:
                key.set_metadata(header, value)
            key.key = destination
            key.set_contents_from_string(data)
Exemple #3
0
    def add_bucket(self,bucket_name,access,zonename,create_date):
        try:
            bucket_count=len(self.conn.get_all_buckets())
            # print 'bucket_count is %s ' %bucket_count
            if bucket_count < self.bucket_limit:
                self.conn.create_bucket(bucket_name)
                b=self.conn.get_bucket(bucket_name)
                try:
                    # k=b.new_key('create_info')
                    # k.set_contents_from_string("{'bucket_name':'%s','zonename':'%s','access':'%s','create_date':'%s'}" %(bucket_name,zonename,access,create_date))
                    k1=Key(b)
                    k1.key='create_info'

                    #k1.set_metadata('Bucket_Name',bucket_name),注意,在设置元数据的时候key名不能带有下划线,该示例在创建的时候会报错403
                    k1.set_metadata('BucketName',bucket_name)
                    k1.set_metadata('ZoneName',zonename)
                    k1.set_metadata('Access',access)
                    k1.set_metadata('CreateDate',create_date)

                    k1.set_contents_from_string('')
                except Exception as e:
                    print r'\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\',e
                return True
            else:
                return False
        except Exception as e:
            return False
Exemple #4
0
    def add_bucket(self, bucket_name, access, zonename, create_date):
        try:
            bucket_count = len(self.conn.get_all_buckets())
            # print 'bucket_count is %s ' %bucket_count
            if bucket_count < self.bucket_limit:
                self.conn.create_bucket(bucket_name)
                b = self.conn.get_bucket(bucket_name)
                try:
                    # k=b.new_key('create_info')
                    # k.set_contents_from_string("{'bucket_name':'%s','zonename':'%s','access':'%s','create_date':'%s'}" %(bucket_name,zonename,access,create_date))
                    k1 = Key(b)
                    k1.key = 'create_info'

                    #k1.set_metadata('Bucket_Name',bucket_name),注意,在设置元数据的时候key名不能带有下划线,该示例在创建的时候会报错403
                    k1.set_metadata('BucketName', bucket_name)
                    k1.set_metadata('ZoneName', zonename)
                    k1.set_metadata('Access', access)
                    k1.set_metadata('CreateDate', create_date)

                    k1.set_contents_from_string('')
                except Exception as e:
                    print r'\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\', e
                return True
            else:
                return False
        except Exception as e:
            return False
Exemple #5
0
    def _upload_file_to_bucket(self, file_path, destination):

        file_name = os.path.basename(file_path)
        destination_path = os.path.join(destination, file_name)
        log_info("Uploading '%s' to s3 bucket '%s' to '%s'" %
                 (file_path, self.bucket_name, destination))

        file_obj = open(file_path)
        k = Key(self.bucket)
        k.key = destination_path
        # set meta data (has to be before setting content in
        # order for it to work)
        k.set_metadata("Content-Type", "application/x-compressed")

        k.set_contents_from_file(file_obj)

        log_info("Completed upload '%s' to s3 bucket '%s'!" %
                 (file_path, self.bucket_name))
Exemple #6
0
    def _upload_file_to_bucket(self, file_path, destination):

        file_name = os.path.basename(file_path)
        destination_path = os.path.join(destination, file_name)
        log_info("Uploading '%s' to s3 bucket '%s' to '%s'" %
                (file_path, self.bucket_name, destination))

        file_obj = open(file_path)
        k = Key(self.bucket)
        k.key = destination_path
        # set meta data (has to be before setting content in
        # order for it to work)
        k.set_metadata("Content-Type", "application/x-compressed")

        k.set_contents_from_file(file_obj)

        log_info("Completed upload '%s' to s3 bucket '%s'!" %
                 (file_path, self.bucket_name))
Exemple #7
0
    def __key(cls,k=None):
        from boto.s3.connection import Key
        from time import gmtime
        from calendar import timegm
        from random import choice
        from string import ascii_lowercase as letters
        
        if k is None:
            key = Key(cls.__bucket())
            k = ''
            for i in range(12):
                k += choice(letters)
            key.key = k
            key.set_metadata('time',timegm(gmtime()))
        else:
            key = cls.__bucket().get_key(k)

        return key
Exemple #8
0
    def save_tree(self, target, new_tree):
        self.connect()

        now_dt = datetime.utcnow()
        now_dt_str = now_dt.strftime('%Y%m%dT%H%M')
        sio = StringIO()
        tree.save_tree(new_tree, sio)

        # Save to S3
        print("Saving tree to S3")
        s3_pf_prefix = self.cp.get('options', 's3-pf-prefix')

        k = Key(self.s3_bucket)
        k.key = '{}/trees/{}.{}'.format(
                s3_pf_prefix,
                target,
                now_dt_str
                )
        k.set_metadata('pf:target', target)
        k.set_metadata('pf:saved_dt', now_dt_str)
        k.set_contents_from_string(sio.getvalue())
Exemple #9
0
def upload_file(conn, full_path):
    b = Bucket(conn, BUCKET)
    k = Key(b)
    k.key = full_path
    expires = datetime.utcnow() + timedelta(days=(25 * 365))
    expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
    k.set_metadata("Content-Type", mimetypes.guess_type(full_path)[0])
    k.set_metadata("Expires", expires)
    k.set_metadata("Cache-Control", "max-age={0}, public".format(25 * 365 * 36400))
    k.set_contents_from_filename(full_path)
    k.set_acl('public-read')
    print "{} -> http://s3.amazonaws.com/yaluandmike/{}".format(full_path, full_path)
entityInfo = reduce(lambda df, tbl: df.join(tbl, entity, how="outer"),
                    partTbls)
entityInfo = add_computed_cols(entityInfo)

# COMMAND ----------
print "writing entityInfo to s3: %s" % destEntityInfoPath
entityInfo.repartition(1).write.format('com.databricks.spark.csv') \
  .options(header='true', nullValue='') \
  .save('/mnt/' + AWS_BUCKET_NAME + '/' + destEntityInfoPath, mode='overwrite')
print "written entityInfo to S3."
# COMMAND ----------
# compute and write out simMatrix

sim = compute_sim_mat(orderInfoDF.groupby(entity, uID).count())
print "sim length: %d" % len(sim)

binary_dump = dumps(sim, -1)
print "writing simMat to s3 Key: %s/%s" % (AWS_BUCKET_NAME, destSimMatKey)

# Open a connection to S3 and write the contents
conn = S3Connection(ACCESS_KEY, SECRET_KEY)
bucket = conn.get_bucket(AWS_BUCKET_NAME)
k = Key(bucket)
k.key = destSimMatKey
k.set_metadata("encoder", "pickle-version-2")
k.set_metadata("generated-by", "mappr-etl")
k.set_metadata("generated-by-source", srcFilePath)
k.set_contents_from_string(binary_dump)
print "written simMat to S3."
sqlContext.clearCache()
Exemple #11
0
def process_file(aws_conn, filepath):
    mtime = get_mtime(filepath)

    name_200 = add_size_name(filepath, '200')
    name_800 = add_size_name(filepath, '800')

    mtime_200 = get_mtime(name_200)
    mtime_800 = get_mtime(name_800)

    im = None
    if mtime_200 is None or mtime_200 < mtime:
        try:
            im = Image.open(filepath)
        except:
            return None
        generate_200(im, name_200)

    if mtime_800 is None or mtime_800 < mtime:
        if im is None:
            try:
                im = Image.open(filepath)
            except:
                return None
        generate_800(im, name_800)

    names = {
        'original': filepath,
        'thumbnail': name_200,
        'display': name_800,
        }


    b = Bucket(aws_conn, BUCKET)

    image_result = {}

    for image_type, name in names.items():
        aws_tag_path = add_size_name(name, 's3t') + '.meta'
        aws_key_path = name[len(GALLERY_DIR):].strip('/')

        image_result[image_type] = {
            'url': 'http://s3.amazonaws.com/{}/{}'.format(
            BUCKET,
            aws_key_path)
            }

        if not is_newer(name, aws_tag_path):
            try:
                resolution = load_data(aws_tag_path)
                resolution['width']
            except:
                resolution = get_resolution(name)
                save_data(aws_tag_path, resolution)
            image_result[image_type].update(resolution)
            continue


        resolution = get_resolution(name)
        image_result.update(resolution)
        save_data(aws_tag_path, resolution)

        s3key = b.get_key(aws_key_path)
        mtime = get_mtime(name)

        if s3key and s3key.last_modified:
            print datetime.datetime(*parsedate(s3key.last_modified)[:6])
            print mtime
            if datetime.datetime(*parsedate(s3key.last_modified)[:6]) > mtime:
                with open(aws_tag_path, 'a'):
                    os.utime(aws_tag_path, None)
                continue
        print 'Sending {} to S3'.format(name)
        k = Key(b)
        k.key = aws_key_path
        expires = datetime.datetime.utcnow() + datetime.timedelta(days=25 * 365)
        expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
        k.set_metadata("Content-Type", mimetypes.guess_type(name)[0])
        k.set_metadata("Expires", expires)
        k.set_metadata("Cache-Control", "max-age={0}, public".format(86400 * 365 * 25))
        k.set_contents_from_filename(name)
        k.set_acl('public-read')

        with open(aws_tag_path, 'a'):
            os.utime(aws_tag_path, None)

    photo_age = get_photo_age(filepath)

    image_result['caption'] = get_caption(filepath)

    return photo_age, image_result