Python S3Helper.getS3FileSize Examples

Programming Language: Python

Namespace/Package Name: helper

Class/Type: S3Helper

Method/Function: getS3FileSize

Examples at hotexamples.com: 2

Python S3Helper.getS3FileSize - 2 examples found. These are the top rated real world Python examples of helper.S3Helper.getS3FileSize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

writeToS3(21)

S3Helper(14)

readFromS3(7)

writeCSVRaw(4)

writeCSV(3)

getS3BucketRegion(2)

getS3FileSize(2)

getFileNames(1)

readBytesFromS3(1)

Example #1

Show file

File: lambda_function.py Project: LouisLoison/deepblooGit

def lambda_handler(event, context):
    if (event['status'] <= 0):
        return {**event, "errorMessage": "Status isnt positive"}
    aws_env = {
        **event,
        "bucketName": os.environ.get('DOCUMENTS_BUCKET'),
        "awsRegion": 'eu-west-1',
        "tmpJsonOutput": "/tmp/tmp_result.json",
        "tmpTxtOutput": "/tmp/tmp_result.txt",
        "outputBucket": os.environ.get('DOCUMENTS_BUCKET'),
        "outputNameJson": get_bbox_filename(event['objectName'], ".json"),
        "outputNameTxt": get_bbox_filename(event['objectName'], ".txt"),
        "textractOnly": os.environ.get('TEXTRACT_ONLY'),
        "minCharNeeded": int(os.environ.get('MIN_CHAR_NEEDED')),
        "extract_pdf_lines": os.environ.get('EXTRACT_PDF_LINES'),
    }
    status = {"statusCode": 200, "body": "All right"}
    extract_pdf_lines = aws_env['extract_pdf_lines']
    textract_only = aws_env['textractOnly']
    tmp_folder = "/tmp/pdfToBbox"
    pdf_tmp_path = copy_pdf_to_tmp(tmp_folder, aws_env)

    print("==> aws_env: ", aws_env)
    if textract_only == "false" and is_pdf_has_enough_characters(
            pdf_tmp_path, aws_env['minCharNeeded']) is True:
        print("=> Extracting bounding box with pdfplumber")
        if extract_pdf_lines == "true":
            print("=> Extracting pdf lines bbox")
            pdf = Pdf(pdf_tmp_path, aws_env['tmpJsonOutput'],
                      aws_env['tmpTxtOutput'])
            pdf.parse_pdf()
            pdf.save_in_json()
            pdf.save_in_txt()
            write_bbox_to_s3(aws_env)
        else:
            print("=> Extracting pdf words bbox")
            if execute_pdf_to_bbox(pdf_tmp_path, aws_env['tmpJsonOutput']):
                print("=> Error while trying to get pdf information")
                aws_env["status"] = -1
                aws_env["errorMessage"] = "PDF format not supported."
            else:
                write_bbox_to_s3(aws_env)
    else:
        print("Extracting bounding box with textract")
        #send_to_textract(aws_env)
    aws_env['size'] = S3Helper.getS3FileSize(aws_env['bucketName'],
                                             aws_env['outputNameTxt'],
                                             aws_env['awsRegion'])
    aws_env["s3Url"] = get_new_s3_url(aws_env['s3Url'], "txt")
    aws_env["status"] = status
    aws_env["status"] = 1
    aws_env["errorMessage"] = None
    aws_env["contentType"] = "text/txt"
    aws_env['objectName'] = aws_env['outputNameTxt']
    aws_env["sourceUrl"] = aws_env["s3Url"]
    AwsHelper.refreshTmpFolder(tmp_folder)
    return update_event(aws_env, event)

Example #2

Show file

File: lambda_function.py Project: LouisLoison/deepblooGit

def lambda_handler(event, context):
    aws_env = {
        **event,
        "bucketName": os.environ['DOCUMENTS_BUCKET'],
        "awsRegion": 'eu-west-1',
        "outputBucket": os.environ['DOCUMENTS_BUCKET'],
        "outputName": get_pdf_filename(event["objectName"], ""),
    }
    print("==> Aws Env: {0}".format(json.dumps(aws_env)))
    html_content = read_from_s3(aws_env)
    sanitized_html_content = sanitize_html_content(html_content)
    status = convert_html_to_pdf(sanitized_html_content, aws_env)
    aws_env['size'] = S3Helper.getS3FileSize(aws_env['bucketName'],
                                             aws_env['outputName'],
                                             aws_env['awsRegion'])
    aws_env["s3Url"] = get_new_s3_url(aws_env['s3Url'], "pdf")
    aws_env["status"] = status['status']
    aws_env["errorMessage"] = status["errorMessage"]
    aws_env["contentType"] = "text/pdf"
    aws_env["objectName"] = aws_env["outputName"]
    aws_env["sourceUrl"] = aws_env["s3Url"]
    return update_event(aws_env, event)