Example #1
0
def put_string_to_key(bucket, key_name, content, is_public, callback=None):
    """Write string to key in S3 bucket. If contents of existing key are
    unchanged, there will be no modification.
    Params:
        bucket (boto.s3 object): The bucket to write to.
        key_name (str): The key to write to (must include any applicable prefix).
        content (str): The content to write to the key.
        is_public (bool): Whether the new object should be publicly readable.
        callback (function): An optional progress callback.
    """
    key = bucket.get_key(key_name)
    if key:
        etag = key.etag.strip('"').lower()
        local_etag = hashlib.md5(content).hexdigest().lower()

        if etag == local_etag:
            # key contents haven't changed
            return

    key = bucket.new_key(key_name)
    mimetype = mimetypes.guess_type(key_name)[0]
    if mimetype:
        key.set_metadata('Content-Type', mimetype)

    policy = 'public-read' if is_public else None

    key.set_contents_from_string(content, policy=policy, cb=callback)
    key.close()
Example #2
0
def put_string_to_key(bucket, key_name, content, is_public, callback=None):
    """Write string to key in S3 bucket. If contents of existing key are
    unchanged, there will be no modification.
    Params:
        bucket (boto.s3 object): The bucket to write to.
        key_name (str): The key to write to (must include any applicable prefix).
        content (str): The content to write to the key.
        is_public (bool): Whether the new object should be publicly readable.
        callback (function): An optional progress callback.
    """
    key = bucket.get_key(key_name)
    if key:
        etag = key.etag.strip('"').lower()
        local_etag = hashlib.md5(content).hexdigest().lower()

        if etag == local_etag:
            # key contents haven't changed
            return

    key = bucket.new_key(key_name)
    mimetype = mimetypes.guess_type(key_name)[0]
    if mimetype:
        key.set_metadata('Content-Type', mimetype)

    policy = 'public-read' if is_public else None

    key.set_contents_from_string(content, policy=policy, cb=callback)
    key.close()
Example #3
0
    def _stream_write_internal(
        self,
        path,
        fp,
        content_type=None,
        content_encoding=None,
        cancel_on_error=True,
        size=filelike.READ_UNTIL_END,
    ):
        """ Writes the data found in the file-like stream to the given path, with optional limit
        on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should
        *not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
        the returned tuple on calls to this method.
    """
        # Minimum size of upload part size on S3 is 5MB
        self._initialize_cloud_conn()
        path = self._init_path(path)
        key = self._key_class(self._cloud_bucket, path)

        if content_type is not None:
            key.set_metadata("Content-Type", content_type)

        if content_encoding is not None:
            key.set_metadata("Content-Encoding", content_encoding)

        if size != filelike.READ_UNTIL_END:
            fp = filelike.StreamSlice(fp, 0, size)

        # TODO figure out how to handle cancel_on_error=False
        try:
            key.set_contents_from_stream(fp)
        except IOError as ex:
            return 0, ex

        return key.size, None
Example #4
0
def add_thumb(config, local_filename, remote_filename, extension):
    bucket = _get_s3_bucket(config)

    key = boto.s3.key.Key(bucket)
    key.key = remote_filename
    key.set_contents_from_filename(local_filename)
    key.set_metadata('Content-Type', "image/" + extension)
    key.set_acl('public-read')
Example #5
0
def add_thumb(config, local_filename, remote_filename, extension):
    bucket = _get_s3_bucket(config)

    key = boto.s3.key.Key(bucket)
    key.key = remote_filename
    key.set_contents_from_filename(local_filename)
    key.set_metadata('Content-Type', "image/" + extension)
    key.set_acl('public-read')
Example #6
0
def add_cv(config, person_id, contents, filename, content_type):
    person_id = str(int(person_id))
    assert person_id != 0

    bucket = _get_s3_bucket(config)

    when = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S-")

    key = boto.s3.key.Key(bucket)
    key.key = "cvs/" + str(person_id) + "/" + when + filename
    key.set_contents_from_string(contents)
    key.set_metadata('Content-Type', content_type)
    key.set_acl('public-read')
Example #7
0
  def upload(self, mpi, source, target, pos = 0, chunk = 0, part = 0):
    '''Thread worker for upload operation.'''
    s3url = S3URL(target)
    bucket = self.s3.lookup(s3url.bucket, validate=self.opt.validate)

    # Initialization: Set up multithreaded uploads.
    if not mpi:
      fsize = os.path.getsize(source)
      key = bucket.get_key(s3url.path)

      # optional checks
      if self.opt.dry_run:
        message('%s => %s', source, target)
        return
      elif self.opt.sync_check and self.sync_check(source, key):
        message('%s => %s (synced)', source, target)
        return
      elif not self.opt.force and key:
        raise Failure('File already exists: %s' % target)

      # extra headers
      extra_headers = {}
      if self.opt.add_header:
        for hdr in self.opt.add_header:
          try:
            key, val = hdr.split(":", 1)
          except ValueError:
            raise Failure("Invalid header format: %s" % hdr)
          key_inval = re.sub("[a-zA-Z0-9-.]", "", key)
          if key_inval:
            key_inval = key_inval.replace(" ", "<space>")
            key_inval = key_inval.replace("\t", "<tab>")
            raise ParameterError("Invalid character(s) in header name '%s': \"%s\"" % (key, key_inval))
          extra_headers[key.strip().lower()] = val.strip()

      # Small file optimization.
      if fsize < self.opt.max_singlepart_upload_size:
        key = boto.s3.key.Key(bucket)
        key.key = s3url.path
        key.set_metadata('privilege',  self.get_file_privilege(source))
        key.set_contents_from_filename(source, reduced_redundancy=self.opt.reduced_redundancy, headers=extra_headers)
        if self.opt.acl_public:
          key.set_acl('public-read')
        message('%s => %s', source, target)
        return

      # Here we need to have our own md5 value because multipart upload calculates
      # different md5 values.
      mpu = bucket.initiate_multipart_upload(s3url.path, metadata = {'md5': self.file_hash(source), 'privilege': self.get_file_privilege(source)})

      for args in self.get_file_splits(mpu.id, source, target, fsize, self.opt.multipart_split_size):
        self.pool.upload(*args)
      return

    # Handle each part in parallel, post initialization.
    for mp in bucket.list_multipart_uploads():
      if mp.id == mpi.id:
        mpu = mp
        break
    if mpu is None:
      raise Failure('Could not find MultiPartUpload %s' % mpu_id)

    data = None
    with open(source, 'rb') as f:
      f.seek(pos)
      data = f.read(chunk)
    if not data:
      raise Failure('Unable to read data from source: %s' % source)

    mpu.upload_part_from_file(StringIO(data), part)

    # Finalize
    if mpi.complete():
      try:
        mpu.complete_upload()
        message('%s => %s', source, target)
      except Exception as e:
        mpu.cancel_upload()
        raise RetryFailure('Upload failed: Unable to complete upload %s.' % source)
Example #8
0
  def upload(self, mpi, source, target, pos = 0, chunk = 0, part = 0):
    '''Thread worker for upload operation.'''
    s3url = S3URL(target)
    bucket = self.s3.lookup(s3url.bucket, validate=self.opt.validate)

    # Initialization: Set up multithreaded uploads.
    if not mpi:
      fsize = os.path.getsize(source)
      key = bucket.get_key(s3url.path)

      # optional checks
      if self.opt.dry_run:
        message('%s => %s', source, target)
        return
      elif self.opt.sync_check and self.sync_check(source, key):
        message('%s => %s (synced)', source, target)
        return
      elif not self.opt.force and key:
        raise Failure('File already exists: %s' % target)

      # Small file optimization.
      if fsize < self.opt.max_singlepart_upload_size:
        key = boto.s3.key.Key(bucket)
        key.key = s3url.path
        key.set_metadata('privilege',  self.get_file_privilege(source))
        key.set_contents_from_filename(source)
        message('%s => %s', source, target)
        return

      # Here we need to have our own md5 value because multipart upload calculates
      # different md5 values.
      mpu = bucket.initiate_multipart_upload(s3url.path, metadata = {'md5': self.file_hash(source), 'privilege': self.get_file_privilege(source)})

      for args in self.get_file_splits(mpu.id, source, target, fsize, self.opt.multipart_split_size):
        self.pool.upload(*args)
      return

    # Handle each part in parallel, post initialization.
    for mp in bucket.list_multipart_uploads():
      if mp.id == mpi.id:
        mpu = mp
        break
    if mpu is None:
      raise Failure('Could not find MultiPartUpload %s' % mpu_id)

    data = None
    with open(source, 'rb') as f:
      f.seek(pos)
      data = f.read(chunk)
    if not data:
      raise Failure('Unable to read data from source: %s' % source)

    mpu.upload_part_from_file(StringIO(data), part)

    # Finalize
    if mpi.complete():
      try:
        mpu.complete_upload()
        message('%s => %s', source, target)
      except Exception as e:
        mpu.cancel_upload()
        raise RetryFailure('Upload failed: Unable to complete upload %s.' % source)
Example #9
0
    def worker(base_path):
        mtime = path = 0
        while 1:
            try:
                mtime, queued_path = queue.get()

                path = queued_path
                if path is None:
                    return

                key = boto.s3.key.Key(bucket)

                if mtime is None:  # delete
                    try:
                        try:
                            key.key = bucket_prefix + path
                            key.delete()
                        except Exception:
                            logger.exception('deleting %r, retrying' % key.key)
                            time.sleep(9)
                            key.key = bucket_prefix + path
                            key.delete()
                    except Exception:
                        if index is not None:
                            # Failed to delete. Put the key back so we
                            # try again later
                            index[queued_path] = 1
                        raise

                elif mtime is GENERATE:
                    (path, s3mtime) = path
                    fspath = join(base_path, path.encode(encoding))
                    if exists(fspath):
                        # Someone created a file since we decided to
                        # generate one.
                        continue

                    fspath = dirname(fspath)
                    data = "Index of " + path[:-len(INDEX_HTML) - 1]
                    data = [
                        "<!-- generated -->",
                        "<html><head><title>%s</title></head><body>" % data,
                        "<h1>%s</h1><table>" % data,
                        "<tr><th>Name</th><th>Last modified</th><th>Size</th>"
                        "</tr>",
                    ]
                    for name in sorted(os.listdir(fspath)):
                        if name.startswith('.'):
                            continue  # don't index dot files
                        name_path = join(fspath, name)
                        if isdir(name_path):
                            name = name + '/'
                            size = '-'
                        else:
                            size = os.stat(name_path).st_size
                        mtime = time.ctime(os.stat(name_path).st_mtime)
                        name = name.decode(encoding)
                        data.append('<tr><td><a href="%s">%s</a></td>\n'
                                    '    <td>%s</td><td>%s</td></tr>' %
                                    (name, name, mtime, size))
                    data.append("</table></body></html>\n")
                    data = '\n'.join(data)

                    digest = hashlib.md5(data.encode(encoding)).hexdigest()
                    if digest != s3mtime:
                        # Note that s3mtime is either a previous
                        # digest or it's 0 (cus path wasn't in s3) or
                        # it's an s3 upload time.  The test above
                        # works in all of these cases.
                        key.key = bucket_prefix + path
                        key.set_metadata('generated', 'true')
                        try:
                            key.set_contents_from_string(
                                data,
                                headers={'Content-Type': 'text/html'},
                            )
                        except Exception:
                            logger.exception(
                                'uploading generated %r, retrying' % path)
                            time.sleep(9)
                            key.set_contents_from_string(
                                data,
                                headers={'Content-Type': 'text/html'},
                            )

                        if s3mtime:
                            # update (if it was add, mtime would be 0)
                            if cloudfront:
                                invalidations.append(path)

                    if index is not None:
                        index[path] = digest

                else:  # upload
                    try:
                        if had_index:
                            # We only store mtimes to the nearest second.
                            # We don't have a fudge factor, so there's a
                            # chance that someone might update the file in
                            # the same second, so we check if a second has
                            # passed and sleep if it hasn't.
                            now = time_time_from_sixtuple(
                                time.gmtime(time.time()))
                            if not now > mtime:
                                time.sleep(1)

                        key.key = bucket_prefix + path
                        path = join(base_path, path)
                        try:
                            key.set_contents_from_filename(
                                path.encode(encoding))
                        except Exception:
                            logger.exception('uploading %r %r, retrying' %
                                             (mtime, path))
                            time.sleep(9)
                            key.set_contents_from_filename(
                                path.encode(encoding))

                    except Exception:
                        if index is not None:
                            # Upload failed. Remove from index so we
                            # try again later (if the path is still
                            # around).
                            index.pop(queued_path)
                        raise

            except Exception:
                logger.exception('processing %r %r' % (mtime, path))
            finally:
                queue.task_done()
Example #10
0
  def upload(self, mpi, source, target, pos = 0, chunk = 0, part = 0):
    '''Thread worker for upload operation.'''
    s3url = S3URL(target)
    bucket = self.s3.lookup(s3url.bucket, validate=self.opt.validate)

    # Initialization: Set up multithreaded uploads.
    if not mpi:
      fsize = os.path.getsize(source)
      key = bucket.get_key(s3url.path)

      # optional checks
      if self.opt.dry_run:
        message('%s => %s', source, target)
        return
      elif self.opt.sync_check and self.sync_check(source, key):
        message('%s => %s (synced)', source, target)
        return
      elif not self.opt.force and key:
        raise Failure('File already exists: %s' % target)

      # Small file optimization.
      if fsize < self.opt.max_singlepart_upload_size:
        key = boto.s3.key.Key(bucket)
        key.key = s3url.path
        key.set_metadata('privilege',  self.get_file_privilege(source))
        key.set_contents_from_filename(source)
        message('%s => %s', source, target)
        return

      # Here we need to have our own md5 value because multipart upload calculates
      # different md5 values.
      mpu = bucket.initiate_multipart_upload(s3url.path, metadata = {'md5': self.file_hash(source), 'privilege': self.get_file_privilege(source)})

      for args in self.get_file_splits(mpu.id, source, target, fsize, self.opt.multipart_split_size):
        self.pool.upload(*args)
      return

    # Handle each part in parallel, post initialization.
    for mp in bucket.list_multipart_uploads():
      if mp.id == mpi.id:
        mpu = mp
        break
    if mpu is None:
      raise Failure('Could not find MultiPartUpload %s' % mpu_id)

    data = None
    with open(source, 'rb') as f:
      f.seek(pos)
      data = f.read(chunk)
    if not data:
      raise Failure('Unable to read data from source: %s' % source)

    mpu.upload_part_from_file(StringIO(data), part)

    # Finalize
    if mpi.complete():
      try:
        mpu.complete_upload()
        message('%s => %s', source, target)
      except Exception as e:
        mpu.cancel_upload()
        raise RetryFailure('Upload failed: Unable to complete upload %s.' % source)
Example #11
0
  def process_item(self, item):
    filepath = item['file']
    filename = item['filename']
    room_id = item['room_id']
    user_id = item['user_id']
    username = item['username']
    room_token = item['room_token']

    print "got this job: %s" % item

    im = thumbnail = None
    try:
      im = Image.open(filepath)
    except:
      pass

    message_type = im and 'image' or 'file'

    # Generate thumbnail
    if im:
      thumbnail = Image.open(filepath)
      thumbnail.thumbnail((300, 300), Image.ANTIALIAS)

    print im
    print thumbnail

    # Upload thumbnail if necessary
    if thumbnail:
      name, ext = os.path.splitext(filename)
      thumbname = '/uploads/%s/%s_thumb%s' % (room_id, name, ext)
      thumbfile = tempfile.NamedTemporaryFile()
      thumbnail.save(thumbfile, im.format)

    # Determine file mimetype
    if im:
      mime_type = 'image/%s' % im.format.lower()
    else:
      mime_type, _ = mimetypes.guess_type(filename)

    # Create keys for file
    key = boto.s3.key.Key(self.bucket)
    key.key = '/uploads/%s/%s' % (room_id, filename)

    if mime_type:
      key.set_metadata('Content-Type', mime_type)

    file = open(filepath)
    filesize = os.path.getsize(filepath)
    key.set_contents_from_file(file)
    file.close()
    os.remove(filepath)

    print "Uploaded file"

    # Upload thumbnail
    if thumbnail:
      thumb_key = boto.s3.key.Key(self.bucket)
      thumb_key.key = thumbname
      if mime_type:
        thumb_key.set_metadata('Content-Type', mime_type)
      thumb_key.set_contents_from_file(thumbfile.file)

    print "Uploaded thumbnail"

    # Create a message
    content = '%s posted a file' % username
    message = {
      'room': room_id,
      'user_id': user_id,
      'user_name': username,
      'type': message_type,
      'filename': filename,
      's3_key': key.key,
      'content': content,
      'created_at': datetime.datetime.utcnow(),
    }
    if message_type == 'image':
      message['size'] = im.size
      message['s3_thumbnail_key'] = thumb_key.key
      message['thumb_size'] = thumbnail.size

    if mime_type:
      message['mime_type'] = mime_type

    message['filesize'] = filesize

    message_id = self.db.messages.insert(message)

    m = {
      'channel': room_token,
      'message': {
        'id': str(message_id),
        'content': message['content'],
        'user_id': str(message['user_id']),
        'user_name': message['user_name'],
        'type': message_type,
        'url': key.generate_url(3600),
      }
    }

    if message_type == 'image':
      m['message']['size'] = message['size']
      m['message']['thumb_url'] = thumb_key.generate_url(3600)

    self.pubnub.publish(m)