def read_from_s3(aws_env): bucket_name = aws_env['bucketName'] s3_file_name = aws_env['objectName'] aws_region = aws_env['awsRegion'] s3 = AwsHelper().getResource('s3', aws_region) obj = s3.Object(bucket_name, s3_file_name) encoding = "utf-8" try: content = obj.get()['Body'].read() except Exception as e: print(e) return try: encoding = chardet.detect(content)['encoding'] print("Trying to decode with {}".format(encoding)) content_decoded = content.decode(encoding) return content_decoded except UnicodeDecodeError as e: print("Failing to decode with encoding {0}: {1}".format(encoding, e)) try: print("Trying by removing the last character") content_without_last_char = content[:-1].decode(encoding) return content_without_last_char except UnicodeDecodeError as e: print("Failing to decode: {}".format(e)) print("Returning content in bytes") return content
def convert_html_to_pdf(html_str, aws_env): aws_region = aws_env['awsRegion'] output_bucket = aws_env['outputBucket'] output_file = aws_env['outputName'] output_content = BytesIO() if html_str is None: return {"status": -1, "errorMessage": "PDF is empty"} print("Writing s3://%s/%s in %s" % (output_bucket, output_file, aws_region)) s3 = AwsHelper().getResource('s3', aws_region) s3_obj = s3.Object(output_bucket, output_file) try: pisa.CreatePDF(html_str, dest=output_content) content = output_content.getvalue().decode("utf-8", errors="ignore") s3_obj.put(Body=content) except ValueError as e: print(e) return {"status": -1, "errorMessage": "PDF format not supported."} return {"status": 1, "errorMessage": None}
def read_bytes_from_s3(bucketName, s3FileName, awsRegion=None): s3 = AwsHelper().getResource('s3', awsRegion) obj = s3.Object(bucketName, s3FileName) content = obj.get()['Body'].read() buffer = BytesIO(content) return buffer