Beispiel #1
0
  def _upload_page(self, local_pdf_path, page_number, jpeg_prefix, bucket,
      webhook_url, webhook_data):
    """ Converts a page of the given PDF to JPEGs. Uploads the JPEGs to S3. """
    local_jpeg_prefix = jpeg_prefix.replace('/', '-')
    local_large_jpeg_path = '%s/%s-large.jpeg' % (self.working_dir,
      local_jpeg_prefix)
    local_small_jpeg_path = '%s/%s-small.jpeg' % (self.working_dir,
      local_jpeg_prefix)
    local_jpeg_path = '%s/%s.jpeg' % (self.working_dir, local_jpeg_prefix)

    # subprocess.check_call(['convert', '-density', '300', '%s[%d]' %
    #   (local_pdf_path, page_number), local_large_jpeg_path])
    # gs is one indexed
    gs_page_number = page_number + 1
    subprocess.check_call(['gs', '-dNOPAUSE', '-sDEVICE=jpeg', '-dFirstPage=%d' % gs_page_number,
                          '-dLastPage=%d' % gs_page_number, '-sOutputFile=%s' % local_large_jpeg_path,
                          '-dJPEGQ=90', '-r300', '-q', local_pdf_path, '-c', 'quit'])
    subprocess.check_call(['convert', '-resize', '800x800',
      local_large_jpeg_path, local_jpeg_path])
    subprocess.check_call(['convert', '-resize', '300x300',
      local_large_jpeg_path, local_small_jpeg_path])
    self._log('Finished converting page %d' % page_number)

    # store converted pages in S3
    large_jpeg_key = s3.Key(bucket)
    jpeg_key = s3.Key(bucket)
    small_jpeg_key = s3.Key(bucket)

    large_jpeg_key.key = '%s-large.jpeg' % (jpeg_prefix)
    jpeg_key.key = '%s.jpeg' % (jpeg_prefix)
    small_jpeg_key.key = '%s-small.jpeg' % (jpeg_prefix)

    large_jpeg_key.set_contents_from_filename(local_large_jpeg_path)
    jpeg_key.set_contents_from_filename(local_jpeg_path)
    small_jpeg_key.set_contents_from_filename(local_small_jpeg_path)

    large_jpeg_key.set_acl('public-read')
    jpeg_key.set_acl('public-read')
    small_jpeg_key.set_acl('public-read')

    self._log('Uploaded page %d' % page_number)
    self._call_webhook(webhook_url, webhook_data, local_jpeg_path, page_number)
Beispiel #2
0
    def delete_file(self, entry, path):
        # Move any bucket subdirectories to the filename
        bucket_name, filename = move_bucket_subdirs_to_path(entry.bucket, path)

        # fixme: use temporary token if configured for it
        secret = aes_decrypt(entry.secret)
        conn = connection.S3Connection(entry.access_key, secret)

        bucket = connection.Bucket(conn, bucket_name)

        s3key = connection.Key(bucket)
        s3key.key = filename
        try:
            bucket.delete_key(s3key)
        except boto.exception.BotoServerError as ex:
            raise IOError(
                    ("Failed to delete '%s' from S3 Cloud Storage " + \
                    "bucket '%s'. %s: %s") % \
                    (filename, bucket_name, ex.reason, ex.message))
        return {'status': 'OK'}
Beispiel #3
0
def test_uri_put_file(sts_conn):
    bn = 'wal-e.sts.uri.put.file'
    cf = connection.OrdinaryCallingFormat()
    policy_text = make_policy(bn, 'test-prefix', allow_get_location=True)
    fed = sts_conn.get_federation_token('wal-e-test-uri-put-file',
                                        policy=policy_text)

    key_path = 'test-prefix/test-key'

    creds = Credentials(fed.credentials.access_key, fed.credentials.secret_key,
                        fed.credentials.session_token)

    with FreshBucket(bn,
                     keys=[key_path],
                     calling_format=cf,
                     host='s3-us-west-1.amazonaws.com') as fb:
        fb.create(location='us-west-1')
        uri_put_file(creds, 's3://' + bn + '/' + key_path,
                     StringIO('test-content'))
        k = connection.Key(fb.conn.get_bucket(bn, validate=False))
        k.name = key_path
        assert k.get_contents_as_string() == 'test-content'
Beispiel #4
0
  def _convert_batch(self, bucket, pdf_path, pages, jpeg_prefixes,
      webhook_url, webhook_data):
    """ Converts the given batch of pages in the provided PDF to JPEGs. """
    # download PDF locally, use first JPEG prefix as its name
    pdf_key = s3.Key(bucket)
    pdf_key.key = pdf_path

    local_jpeg_prefix = jpeg_prefixes[0].replace('/', '-')
    local_pdf_path = '%s/%s.pdf' % (self.working_dir, local_jpeg_prefix)

    pdf_key.get_contents_to_filename(local_pdf_path)
    threads = []

    # convert each page in a separate thread using ImageMagick
    for page_number, jpeg_prefix in zip(pages, jpeg_prefixes):
      args = (local_pdf_path, page_number, jpeg_prefix, bucket, webhook_url,
          webhook_data)
      threads.append(threading.Thread(target=self._upload_page, args=args))

    [thread.start() for thread in threads]

    # wait until all threads have completed
    [thread.join() for thread in threads]
Beispiel #5
0
def test_policy(sts_conn, monkeypatch):
    """Sanity checks for the intended ACLs of the policy"""
    monkeypatch.setenv('AWS_REGION', 'us-west-1')
    # Use periods to force OrdinaryCallingFormat when using
    # calling_format.from_store_name.
    bn = bucket_name_mangle('wal-e.sts.list.test')
    h = 's3-us-west-1.amazonaws.com'
    cf = connection.OrdinaryCallingFormat()

    fed = sts_conn.get_federation_token('wal-e-test-list-bucket',
                                        policy=make_policy(bn, 'test-prefix'))
    test_payload = 'wal-e test'

    keys = [
        'test-prefix/hello', 'test-prefix/world', 'not-in-prefix/goodbye',
        'not-in-prefix/world'
    ]
    creds = Credentials(fed.credentials.access_key, fed.credentials.secret_key,
                        fed.credentials.session_token)

    with FreshBucket(bn, keys=keys, calling_format=cf, host=h) as fb:
        # Superuser creds, for testing keys not in the prefix.
        bucket_superset_creds = fb.create(location='us-west-1')

        cinfo = calling_format.from_store_name(bn)
        conn = cinfo.connect(creds)
        conn.host = h

        # Bucket using the token, subject to the policy.
        bucket = conn.get_bucket(bn, validate=False)

        for name in keys:
            if name.startswith('test-prefix/'):
                # Test the PUT privilege.
                k = connection.Key(bucket)
            else:
                # Not in the prefix, so PUT will not work.
                k = connection.Key(bucket_superset_creds)

            k.key = name
            k.set_contents_from_string(test_payload)

        # Test listing keys within the prefix.
        prefix_fetched_keys = list(bucket.list(prefix='test-prefix/'))
        assert len(prefix_fetched_keys) == 2

        # Test the GET privilege.
        for key in prefix_fetched_keys:
            assert key.get_contents_as_string() == b'wal-e test'

        # Try a bogus listing outside the valid prefix.
        with pytest.raises(exception.S3ResponseError) as e:
            list(bucket.list(prefix=''))

        assert e.value.status == 403

        # Test the rejection of PUT outside of prefix.
        k = connection.Key(bucket)
        k.key = 'not-in-prefix/world'

        with pytest.raises(exception.S3ResponseError) as e:
            k.set_contents_from_string(test_payload)

        assert e.value.status == 403