def _upload_page(self, local_pdf_path, page_number, jpeg_prefix, bucket, webhook_url, webhook_data): """ Converts a page of the given PDF to JPEGs. Uploads the JPEGs to S3. """ local_jpeg_prefix = jpeg_prefix.replace('/', '-') local_large_jpeg_path = '%s/%s-large.jpeg' % (self.working_dir, local_jpeg_prefix) local_small_jpeg_path = '%s/%s-small.jpeg' % (self.working_dir, local_jpeg_prefix) local_jpeg_path = '%s/%s.jpeg' % (self.working_dir, local_jpeg_prefix) # subprocess.check_call(['convert', '-density', '300', '%s[%d]' % # (local_pdf_path, page_number), local_large_jpeg_path]) # gs is one indexed gs_page_number = page_number + 1 subprocess.check_call(['gs', '-dNOPAUSE', '-sDEVICE=jpeg', '-dFirstPage=%d' % gs_page_number, '-dLastPage=%d' % gs_page_number, '-sOutputFile=%s' % local_large_jpeg_path, '-dJPEGQ=90', '-r300', '-q', local_pdf_path, '-c', 'quit']) subprocess.check_call(['convert', '-resize', '800x800', local_large_jpeg_path, local_jpeg_path]) subprocess.check_call(['convert', '-resize', '300x300', local_large_jpeg_path, local_small_jpeg_path]) self._log('Finished converting page %d' % page_number) # store converted pages in S3 large_jpeg_key = s3.Key(bucket) jpeg_key = s3.Key(bucket) small_jpeg_key = s3.Key(bucket) large_jpeg_key.key = '%s-large.jpeg' % (jpeg_prefix) jpeg_key.key = '%s.jpeg' % (jpeg_prefix) small_jpeg_key.key = '%s-small.jpeg' % (jpeg_prefix) large_jpeg_key.set_contents_from_filename(local_large_jpeg_path) jpeg_key.set_contents_from_filename(local_jpeg_path) small_jpeg_key.set_contents_from_filename(local_small_jpeg_path) large_jpeg_key.set_acl('public-read') jpeg_key.set_acl('public-read') small_jpeg_key.set_acl('public-read') self._log('Uploaded page %d' % page_number) self._call_webhook(webhook_url, webhook_data, local_jpeg_path, page_number)
def delete_file(self, entry, path): # Move any bucket subdirectories to the filename bucket_name, filename = move_bucket_subdirs_to_path(entry.bucket, path) # fixme: use temporary token if configured for it secret = aes_decrypt(entry.secret) conn = connection.S3Connection(entry.access_key, secret) bucket = connection.Bucket(conn, bucket_name) s3key = connection.Key(bucket) s3key.key = filename try: bucket.delete_key(s3key) except boto.exception.BotoServerError as ex: raise IOError( ("Failed to delete '%s' from S3 Cloud Storage " + \ "bucket '%s'. %s: %s") % \ (filename, bucket_name, ex.reason, ex.message)) return {'status': 'OK'}
def test_uri_put_file(sts_conn): bn = 'wal-e.sts.uri.put.file' cf = connection.OrdinaryCallingFormat() policy_text = make_policy(bn, 'test-prefix', allow_get_location=True) fed = sts_conn.get_federation_token('wal-e-test-uri-put-file', policy=policy_text) key_path = 'test-prefix/test-key' creds = Credentials(fed.credentials.access_key, fed.credentials.secret_key, fed.credentials.session_token) with FreshBucket(bn, keys=[key_path], calling_format=cf, host='s3-us-west-1.amazonaws.com') as fb: fb.create(location='us-west-1') uri_put_file(creds, 's3://' + bn + '/' + key_path, StringIO('test-content')) k = connection.Key(fb.conn.get_bucket(bn, validate=False)) k.name = key_path assert k.get_contents_as_string() == 'test-content'
def _convert_batch(self, bucket, pdf_path, pages, jpeg_prefixes, webhook_url, webhook_data): """ Converts the given batch of pages in the provided PDF to JPEGs. """ # download PDF locally, use first JPEG prefix as its name pdf_key = s3.Key(bucket) pdf_key.key = pdf_path local_jpeg_prefix = jpeg_prefixes[0].replace('/', '-') local_pdf_path = '%s/%s.pdf' % (self.working_dir, local_jpeg_prefix) pdf_key.get_contents_to_filename(local_pdf_path) threads = [] # convert each page in a separate thread using ImageMagick for page_number, jpeg_prefix in zip(pages, jpeg_prefixes): args = (local_pdf_path, page_number, jpeg_prefix, bucket, webhook_url, webhook_data) threads.append(threading.Thread(target=self._upload_page, args=args)) [thread.start() for thread in threads] # wait until all threads have completed [thread.join() for thread in threads]
def test_policy(sts_conn, monkeypatch): """Sanity checks for the intended ACLs of the policy""" monkeypatch.setenv('AWS_REGION', 'us-west-1') # Use periods to force OrdinaryCallingFormat when using # calling_format.from_store_name. bn = bucket_name_mangle('wal-e.sts.list.test') h = 's3-us-west-1.amazonaws.com' cf = connection.OrdinaryCallingFormat() fed = sts_conn.get_federation_token('wal-e-test-list-bucket', policy=make_policy(bn, 'test-prefix')) test_payload = 'wal-e test' keys = [ 'test-prefix/hello', 'test-prefix/world', 'not-in-prefix/goodbye', 'not-in-prefix/world' ] creds = Credentials(fed.credentials.access_key, fed.credentials.secret_key, fed.credentials.session_token) with FreshBucket(bn, keys=keys, calling_format=cf, host=h) as fb: # Superuser creds, for testing keys not in the prefix. bucket_superset_creds = fb.create(location='us-west-1') cinfo = calling_format.from_store_name(bn) conn = cinfo.connect(creds) conn.host = h # Bucket using the token, subject to the policy. bucket = conn.get_bucket(bn, validate=False) for name in keys: if name.startswith('test-prefix/'): # Test the PUT privilege. k = connection.Key(bucket) else: # Not in the prefix, so PUT will not work. k = connection.Key(bucket_superset_creds) k.key = name k.set_contents_from_string(test_payload) # Test listing keys within the prefix. prefix_fetched_keys = list(bucket.list(prefix='test-prefix/')) assert len(prefix_fetched_keys) == 2 # Test the GET privilege. for key in prefix_fetched_keys: assert key.get_contents_as_string() == b'wal-e test' # Try a bogus listing outside the valid prefix. with pytest.raises(exception.S3ResponseError) as e: list(bucket.list(prefix='')) assert e.value.status == 403 # Test the rejection of PUT outside of prefix. k = connection.Key(bucket) k.key = 'not-in-prefix/world' with pytest.raises(exception.S3ResponseError) as e: k.set_contents_from_string(test_payload) assert e.value.status == 403