def test_start_analysis_job(make_stubber, error_code):
    textract_client = boto3.client('textract')
    textract_stubber = make_stubber(textract_client)
    twrapper = TextractWrapper(textract_client, None, None)
    bucket_name = 'test-bucket_name'
    file_name = 'test-file_name'
    feature_types = ['TABLES', 'FORMS']
    topic_arn = 'arn:aws:sns:REGION:123456789012:topic/test-topic'
    role_arn = 'arn:aws:iam:REGION:123456789012:role/test-role'
    job_id = 'test-job_id'

    textract_stubber.stub_start_document_analysis(bucket_name,
                                                  file_name,
                                                  feature_types,
                                                  job_id,
                                                  topic_arn=topic_arn,
                                                  role_arn=role_arn,
                                                  error_code=error_code)

    if error_code is None:
        got_job_id = twrapper.start_analysis_job(bucket_name, file_name,
                                                 feature_types, topic_arn,
                                                 role_arn)
        assert got_job_id == job_id
    else:
        with pytest.raises(ClientError) as exc_info:
            twrapper.start_analysis_job(bucket_name, file_name, feature_types,
                                        topic_arn, role_arn)
        assert exc_info.value.response['Error']['Code'] == error_code
def test_check_job_queue(make_stubber, error_code):
    sqs_resource = boto3.resource('sqs')
    sqs_stubber = make_stubber(sqs_resource.meta.client)
    twrapper = TextractWrapper(None, None, sqs_resource)
    queue_url = 'test-queue_url'
    job_id = 'test-job_id'
    status = 'test-status'
    messages = [{
        'body':
        json.dumps(
            {'Message': json.dumps({
                'JobId': job_id,
                'Status': status
            })})
    }]

    sqs_stubber.stub_receive_messages(queue_url,
                                      messages,
                                      None,
                                      omit_wait_time=True,
                                      message_attributes=None,
                                      error_code=error_code)
    if error_code is None:
        sqs_stubber.stub_delete_message(queue_url, receipt_handle='Receipt-0')

    got_status = twrapper.check_job_queue(queue_url, job_id)
    if error_code is None:
        assert got_status == status
    else:
        assert got_status is None
Example #3
0
def test_get_analysis_job(make_stubber, error_code):
    textract_client = boto3.client('textract')
    textract_stubber = make_stubber(textract_client)
    twrapper = TextractWrapper(textract_client, None, None)
    job_id = 'test-job_id'
    job_status = 'SUCCEEDED'

    textract_stubber.stub_get_document_analysis(
        job_id, job_status, error_code=error_code)

    if error_code is None:
        got_job_status = twrapper.get_analysis_job(job_id)
        assert got_job_status['JobStatus'] == job_status
    else:
        with pytest.raises(ClientError) as exc_info:
            twrapper.get_analysis_job(job_id)
        assert exc_info.value.response['Error']['Code'] == error_code
def test_analyze_file(make_stubber, func_kwargs, doc_bytes, error_code):
    textract_client = boto3.client('textract')
    textract_stubber = make_stubber(textract_client)
    twrapper = TextractWrapper(textract_client, None, None)
    feature_types = ['TABLES', 'FORMS']
    blocks = [{'BlockType': 'TEST'}]

    textract_stubber.stub_analyze_document(doc_bytes,
                                           feature_types,
                                           blocks,
                                           error_code=error_code)

    if error_code is None:
        if list(func_kwargs.keys())[0] == 'document_file_name':
            with patch('builtins.open',
                       mock_open(read_data=doc_bytes)) as mock_file:
                got_blocks = twrapper.analyze_file(feature_types,
                                                   **func_kwargs)
            mock_file.assert_called_once_with(
                func_kwargs['document_file_name'], 'rb')
        else:
            got_blocks = twrapper.analyze_file(feature_types, **func_kwargs)
        assert got_blocks['Blocks'] == blocks
    else:
        with pytest.raises(ClientError) as exc_info:
            twrapper.analyze_file(feature_types, **func_kwargs)
        assert exc_info.value.response['Error']['Code'] == error_code
Example #5
0
def usage_demo(outputs):
    """
    Launches the Textract Explorer Tkinter application with a default document image.
    """
    s3_resource = boto3.resource('s3')
    bucket = s3_resource.Bucket('textract-public-assets-' +
                                s3_resource.meta.client.meta.region_name)
    default_image_name = 'default_document_3.png'
    default_image_bytes = BytesIO()
    bucket.download_fileobj(default_image_name, default_image_bytes)
    twrapper = TextractWrapper(boto3.client('textract'), boto3.resource('s3'),
                               boto3.resource('sqs'))
    TextractExplorer(twrapper, outputs, default_image_name,
                     default_image_bytes)
def test_prepare_job(monkeypatch, error_code):
    s3_resource = boto3.resource('s3')
    twrapper = TextractWrapper(None, s3_resource, None)
    bucket_name = 'test-bucket_name'
    document_name = 'test-document_name'
    doc_bytes = BytesIO(b'test-doc-bytes')

    def mock_upload(Fileobj, Bucket, Key, ExtraArgs, Callback, Config):
        assert Bucket == bucket_name
        assert Fileobj == doc_bytes
        assert Key == document_name
        if error_code is not None:
            raise ClientError({'Error': {'Code': error_code}}, 'test-op')

    monkeypatch.setattr(s3_resource.meta.client, 'upload_fileobj', mock_upload)

    if error_code is None:
        twrapper.prepare_job(bucket_name, document_name, doc_bytes)
    else:
        with pytest.raises(ClientError) as exc_info:
            twrapper.prepare_job(bucket_name, document_name, doc_bytes)
        assert exc_info.value.response['Error']['Code'] == error_code
def test_make_page_hierarchy():
    got_blocks = TextractWrapper.make_page_hierarchy(test_input)
    assert got_blocks == test_hierarchy