def put_string_to_key(bucket, key_name, content, is_public, callback=None): """Write string to key in S3 bucket. If contents of existing key are unchanged, there will be no modification. Params: bucket (boto.s3 object): The bucket to write to. key_name (str): The key to write to (must include any applicable prefix). content (str): The content to write to the key. is_public (bool): Whether the new object should be publicly readable. callback (function): An optional progress callback. """ key = bucket.get_key(key_name) if key: etag = key.etag.strip('"').lower() local_etag = hashlib.md5(content).hexdigest().lower() if etag == local_etag: # key contents haven't changed return key = bucket.new_key(key_name) mimetype = mimetypes.guess_type(key_name)[0] if mimetype: key.set_metadata('Content-Type', mimetype) policy = 'public-read' if is_public else None key.set_contents_from_string(content, policy=policy, cb=callback) key.close()
def _stream_write_internal( self, path, fp, content_type=None, content_encoding=None, cancel_on_error=True, size=filelike.READ_UNTIL_END, ): """ Writes the data found in the file-like stream to the given path, with optional limit on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should *not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check the returned tuple on calls to this method. """ # Minimum size of upload part size on S3 is 5MB self._initialize_cloud_conn() path = self._init_path(path) key = self._key_class(self._cloud_bucket, path) if content_type is not None: key.set_metadata("Content-Type", content_type) if content_encoding is not None: key.set_metadata("Content-Encoding", content_encoding) if size != filelike.READ_UNTIL_END: fp = filelike.StreamSlice(fp, 0, size) # TODO figure out how to handle cancel_on_error=False try: key.set_contents_from_stream(fp) except IOError as ex: return 0, ex return key.size, None
def add_thumb(config, local_filename, remote_filename, extension): bucket = _get_s3_bucket(config) key = boto.s3.key.Key(bucket) key.key = remote_filename key.set_contents_from_filename(local_filename) key.set_metadata('Content-Type', "image/" + extension) key.set_acl('public-read')
def add_cv(config, person_id, contents, filename, content_type): person_id = str(int(person_id)) assert person_id != 0 bucket = _get_s3_bucket(config) when = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S-") key = boto.s3.key.Key(bucket) key.key = "cvs/" + str(person_id) + "/" + when + filename key.set_contents_from_string(contents) key.set_metadata('Content-Type', content_type) key.set_acl('public-read')
def upload(self, mpi, source, target, pos = 0, chunk = 0, part = 0): '''Thread worker for upload operation.''' s3url = S3URL(target) bucket = self.s3.lookup(s3url.bucket, validate=self.opt.validate) # Initialization: Set up multithreaded uploads. if not mpi: fsize = os.path.getsize(source) key = bucket.get_key(s3url.path) # optional checks if self.opt.dry_run: message('%s => %s', source, target) return elif self.opt.sync_check and self.sync_check(source, key): message('%s => %s (synced)', source, target) return elif not self.opt.force and key: raise Failure('File already exists: %s' % target) # extra headers extra_headers = {} if self.opt.add_header: for hdr in self.opt.add_header: try: key, val = hdr.split(":", 1) except ValueError: raise Failure("Invalid header format: %s" % hdr) key_inval = re.sub("[a-zA-Z0-9-.]", "", key) if key_inval: key_inval = key_inval.replace(" ", "<space>") key_inval = key_inval.replace("\t", "<tab>") raise ParameterError("Invalid character(s) in header name '%s': \"%s\"" % (key, key_inval)) extra_headers[key.strip().lower()] = val.strip() # Small file optimization. if fsize < self.opt.max_singlepart_upload_size: key = boto.s3.key.Key(bucket) key.key = s3url.path key.set_metadata('privilege', self.get_file_privilege(source)) key.set_contents_from_filename(source, reduced_redundancy=self.opt.reduced_redundancy, headers=extra_headers) if self.opt.acl_public: key.set_acl('public-read') message('%s => %s', source, target) return # Here we need to have our own md5 value because multipart upload calculates # different md5 values. mpu = bucket.initiate_multipart_upload(s3url.path, metadata = {'md5': self.file_hash(source), 'privilege': self.get_file_privilege(source)}) for args in self.get_file_splits(mpu.id, source, target, fsize, self.opt.multipart_split_size): self.pool.upload(*args) return # Handle each part in parallel, post initialization. for mp in bucket.list_multipart_uploads(): if mp.id == mpi.id: mpu = mp break if mpu is None: raise Failure('Could not find MultiPartUpload %s' % mpu_id) data = None with open(source, 'rb') as f: f.seek(pos) data = f.read(chunk) if not data: raise Failure('Unable to read data from source: %s' % source) mpu.upload_part_from_file(StringIO(data), part) # Finalize if mpi.complete(): try: mpu.complete_upload() message('%s => %s', source, target) except Exception as e: mpu.cancel_upload() raise RetryFailure('Upload failed: Unable to complete upload %s.' % source)
def upload(self, mpi, source, target, pos = 0, chunk = 0, part = 0): '''Thread worker for upload operation.''' s3url = S3URL(target) bucket = self.s3.lookup(s3url.bucket, validate=self.opt.validate) # Initialization: Set up multithreaded uploads. if not mpi: fsize = os.path.getsize(source) key = bucket.get_key(s3url.path) # optional checks if self.opt.dry_run: message('%s => %s', source, target) return elif self.opt.sync_check and self.sync_check(source, key): message('%s => %s (synced)', source, target) return elif not self.opt.force and key: raise Failure('File already exists: %s' % target) # Small file optimization. if fsize < self.opt.max_singlepart_upload_size: key = boto.s3.key.Key(bucket) key.key = s3url.path key.set_metadata('privilege', self.get_file_privilege(source)) key.set_contents_from_filename(source) message('%s => %s', source, target) return # Here we need to have our own md5 value because multipart upload calculates # different md5 values. mpu = bucket.initiate_multipart_upload(s3url.path, metadata = {'md5': self.file_hash(source), 'privilege': self.get_file_privilege(source)}) for args in self.get_file_splits(mpu.id, source, target, fsize, self.opt.multipart_split_size): self.pool.upload(*args) return # Handle each part in parallel, post initialization. for mp in bucket.list_multipart_uploads(): if mp.id == mpi.id: mpu = mp break if mpu is None: raise Failure('Could not find MultiPartUpload %s' % mpu_id) data = None with open(source, 'rb') as f: f.seek(pos) data = f.read(chunk) if not data: raise Failure('Unable to read data from source: %s' % source) mpu.upload_part_from_file(StringIO(data), part) # Finalize if mpi.complete(): try: mpu.complete_upload() message('%s => %s', source, target) except Exception as e: mpu.cancel_upload() raise RetryFailure('Upload failed: Unable to complete upload %s.' % source)
def worker(base_path): mtime = path = 0 while 1: try: mtime, queued_path = queue.get() path = queued_path if path is None: return key = boto.s3.key.Key(bucket) if mtime is None: # delete try: try: key.key = bucket_prefix + path key.delete() except Exception: logger.exception('deleting %r, retrying' % key.key) time.sleep(9) key.key = bucket_prefix + path key.delete() except Exception: if index is not None: # Failed to delete. Put the key back so we # try again later index[queued_path] = 1 raise elif mtime is GENERATE: (path, s3mtime) = path fspath = join(base_path, path.encode(encoding)) if exists(fspath): # Someone created a file since we decided to # generate one. continue fspath = dirname(fspath) data = "Index of " + path[:-len(INDEX_HTML) - 1] data = [ "<!-- generated -->", "<html><head><title>%s</title></head><body>" % data, "<h1>%s</h1><table>" % data, "<tr><th>Name</th><th>Last modified</th><th>Size</th>" "</tr>", ] for name in sorted(os.listdir(fspath)): if name.startswith('.'): continue # don't index dot files name_path = join(fspath, name) if isdir(name_path): name = name + '/' size = '-' else: size = os.stat(name_path).st_size mtime = time.ctime(os.stat(name_path).st_mtime) name = name.decode(encoding) data.append('<tr><td><a href="%s">%s</a></td>\n' ' <td>%s</td><td>%s</td></tr>' % (name, name, mtime, size)) data.append("</table></body></html>\n") data = '\n'.join(data) digest = hashlib.md5(data.encode(encoding)).hexdigest() if digest != s3mtime: # Note that s3mtime is either a previous # digest or it's 0 (cus path wasn't in s3) or # it's an s3 upload time. The test above # works in all of these cases. key.key = bucket_prefix + path key.set_metadata('generated', 'true') try: key.set_contents_from_string( data, headers={'Content-Type': 'text/html'}, ) except Exception: logger.exception( 'uploading generated %r, retrying' % path) time.sleep(9) key.set_contents_from_string( data, headers={'Content-Type': 'text/html'}, ) if s3mtime: # update (if it was add, mtime would be 0) if cloudfront: invalidations.append(path) if index is not None: index[path] = digest else: # upload try: if had_index: # We only store mtimes to the nearest second. # We don't have a fudge factor, so there's a # chance that someone might update the file in # the same second, so we check if a second has # passed and sleep if it hasn't. now = time_time_from_sixtuple( time.gmtime(time.time())) if not now > mtime: time.sleep(1) key.key = bucket_prefix + path path = join(base_path, path) try: key.set_contents_from_filename( path.encode(encoding)) except Exception: logger.exception('uploading %r %r, retrying' % (mtime, path)) time.sleep(9) key.set_contents_from_filename( path.encode(encoding)) except Exception: if index is not None: # Upload failed. Remove from index so we # try again later (if the path is still # around). index.pop(queued_path) raise except Exception: logger.exception('processing %r %r' % (mtime, path)) finally: queue.task_done()
def process_item(self, item): filepath = item['file'] filename = item['filename'] room_id = item['room_id'] user_id = item['user_id'] username = item['username'] room_token = item['room_token'] print "got this job: %s" % item im = thumbnail = None try: im = Image.open(filepath) except: pass message_type = im and 'image' or 'file' # Generate thumbnail if im: thumbnail = Image.open(filepath) thumbnail.thumbnail((300, 300), Image.ANTIALIAS) print im print thumbnail # Upload thumbnail if necessary if thumbnail: name, ext = os.path.splitext(filename) thumbname = '/uploads/%s/%s_thumb%s' % (room_id, name, ext) thumbfile = tempfile.NamedTemporaryFile() thumbnail.save(thumbfile, im.format) # Determine file mimetype if im: mime_type = 'image/%s' % im.format.lower() else: mime_type, _ = mimetypes.guess_type(filename) # Create keys for file key = boto.s3.key.Key(self.bucket) key.key = '/uploads/%s/%s' % (room_id, filename) if mime_type: key.set_metadata('Content-Type', mime_type) file = open(filepath) filesize = os.path.getsize(filepath) key.set_contents_from_file(file) file.close() os.remove(filepath) print "Uploaded file" # Upload thumbnail if thumbnail: thumb_key = boto.s3.key.Key(self.bucket) thumb_key.key = thumbname if mime_type: thumb_key.set_metadata('Content-Type', mime_type) thumb_key.set_contents_from_file(thumbfile.file) print "Uploaded thumbnail" # Create a message content = '%s posted a file' % username message = { 'room': room_id, 'user_id': user_id, 'user_name': username, 'type': message_type, 'filename': filename, 's3_key': key.key, 'content': content, 'created_at': datetime.datetime.utcnow(), } if message_type == 'image': message['size'] = im.size message['s3_thumbnail_key'] = thumb_key.key message['thumb_size'] = thumbnail.size if mime_type: message['mime_type'] = mime_type message['filesize'] = filesize message_id = self.db.messages.insert(message) m = { 'channel': room_token, 'message': { 'id': str(message_id), 'content': message['content'], 'user_id': str(message['user_id']), 'user_name': message['user_name'], 'type': message_type, 'url': key.generate_url(3600), } } if message_type == 'image': m['message']['size'] = message['size'] m['message']['thumb_url'] = thumb_key.generate_url(3600) self.pubnub.publish(m)