コード例 #1
0
def test_function_step_with_context():
    document_store = JsonDocumentStore("/tmp/test-json-store",
                                       force_initialize=True)
    document_store.add(create_document())
    new_document_store = JsonDocumentStore("/tmp/test-json-store2",
                                           force_initialize=True)

    def my_function(doc, context):
        doc.metadata.cheese = context.transaction_id
        logging.error("Hello")
        return doc

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store)
    pipeline.add_step(my_function)
    pipeline.set_sink(new_document_store)
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 0
    assert new_document_store.count() == 1
    assert new_document_store.get_document(
        0).metadata.cheese == pipeline.context.transaction_id

    print(new_document_store.get_document(0).log)
コード例 #2
0
def test_function_step_with_exception():
    document_store = JsonDocumentStore("/tmp/test-json-store",
                                       force_initialize=True)
    document_store.add(create_document())
    new_document_store = JsonDocumentStore("/tmp/test-json-store2",
                                           force_initialize=True)

    def my_function(doc):
        doc.metadata.cheese = "fishstick"
        raise Exception("hello world")
        return doc

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_step(my_function)
    pipeline.set_sink(new_document_store)
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 1
    assert new_document_store.count() == 1

    assert len(new_document_store.get_document(0).exceptions) == 1

    print(new_document_store.get_document(0).exceptions)
コード例 #3
0
def test_table_stores_with_extractor():
    document_store = LocalDocumentStore()
    document_store.put("test.doc", create_document())
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_store('output', TableDataStore(columns=['cheese']))

    def extractor(document, context):
        # An example of how we might
        # extract into a dict
        #
        context.get_store('output').add(['test'])

        return document

    pipeline.add_step(extractor)

    context = pipeline.run()

    assert context.get_store('output').count() == 1
コード例 #4
0
def test_table_stores_with_extractor():
    document_store = JsonDocumentStore("/tmp/test-json-store",
                                       force_initialize=True)
    document_store.add(create_document())
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_store('output', TableDataStore(columns=['cheese']))

    def extractor(document, context):
        # An example of how we might
        # extract into a dict
        #
        context.get_store('output').add(['test'])

        return document

    pipeline.add_step(extractor)

    context = pipeline.run()

    assert pipeline.context.get_store('output').count() == 1
コード例 #5
0
def test_dict_stores_with_extractor():
    document_store = JsonDocumentStore("/tmp/test-json-store",
                                       force_initialize=True)
    document_store.add(create_document())
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_store('output', DictDataStore())

    def extractor(document, context):
        # An example of how we might
        # extract into a dict
        #
        context.get_store('output').add({'cheese': 'test'})

        return document

    pipeline.add_step(extractor)

    stats = pipeline.run().statistics

    assert pipeline.context.get_store('output').count() == 1
コード例 #6
0
def test_function_step_with_context():
    document_store = LocalDocumentStore()
    document_store.put("test.doc", create_document())
    new_document_store = LocalDocumentStore()

    def my_function(doc, context):
        doc.metadata.cheese = context.execution_id
        logging.error("Hello")
        return doc

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store)
    pipeline.add_step(my_function)
    pipeline.add_step(DocumentStoreWriter(new_document_store))
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 0
    assert new_document_store.count() == 1
    assert new_document_store.get_latest_document("test.doc").metadata.cheese == pipeline.context.execution_id
コード例 #7
0
def test_function_step_with_exception():
    document_store = LocalDocumentStore()
    document_store.put("test.doc", create_document())
    new_document_store = LocalDocumentStore()

    def my_function(doc):
        doc.metadata.cheese = "fishstick"
        raise Exception("hello world")

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_step(my_function)
    pipeline.add_step(DocumentStoreWriter(new_document_store))
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 1
    assert new_document_store.count() == 1

    assert len(new_document_store.get_latest_document("test.doc").exceptions) == 1
コード例 #8
0
def test_class_step_step_with_context():
    document_store = LocalDocumentStore()
    document_store.put('test.doc', create_document())

    new_document_store = LocalDocumentStore()

    class MyProcessingStep:

        def get_name(self):
            return "test-step"

        def process(self, doc, context):
            doc.metadata.cheese = context.execution_id
            logging.error("Hello")
            return doc

    pipeline = Pipeline(document_store)
    pipeline.add_step(MyProcessingStep())
    pipeline.add_step(DocumentStoreWriter(new_document_store))
    ctxt = pipeline.run()

    assert ctxt.statistics.documents_processed == 1
    assert ctxt.statistics.document_exceptions == 0
    assert new_document_store.get_latest_document("test.doc").metadata.cheese == pipeline.context.execution_id