Ejemplo n.º 1
0
def test_basic_local_document_store():
    JSON_STORE = "/tmp/test-json-store.jsonkey"
    document_store = LocalDocumentStore(store_path=JSON_STORE, force_initialize=True)
    document_store.put("test.doc", create_document())

    new_document_store = LocalDocumentStore(store_path=JSON_STORE)

    assert (new_document_store.count() == 1)
Ejemplo n.º 2
0
def test_function_step_with_context():
    document_store = LocalDocumentStore()
    document_store.put("test.doc", create_document())
    new_document_store = LocalDocumentStore()

    def my_function(doc, context):
        doc.metadata.cheese = context.execution_id
        logging.error("Hello")
        return doc

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store)
    pipeline.add_step(my_function)
    pipeline.add_step(DocumentStoreWriter(new_document_store))
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 0
    assert new_document_store.count() == 1
    assert new_document_store.get_latest_document("test.doc").metadata.cheese == pipeline.context.execution_id
Ejemplo n.º 3
0
def test_function_step_with_exception():
    document_store = LocalDocumentStore()
    document_store.put("test.doc", create_document())
    new_document_store = LocalDocumentStore()

    def my_function(doc):
        doc.metadata.cheese = "fishstick"
        raise Exception("hello world")

    assert new_document_store.count() == 0
    pipeline = Pipeline(document_store, stop_on_exception=False)
    pipeline.add_step(my_function)
    pipeline.add_step(DocumentStoreWriter(new_document_store))
    stats = pipeline.run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 1
    assert new_document_store.count() == 1

    assert len(new_document_store.get_latest_document("test.doc").exceptions) == 1
Ejemplo n.º 4
0
def test_url_pipeline():
    document = Document.from_url("http://www.google.com")
    new_document_store = LocalDocumentStore()

    stats = Pipeline(document).add_step(TextParser(encoding='ISO-8859-1')).add_step(
        DocumentStoreWriter(new_document_store)).run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 0
    assert new_document_store.count() == 1

    new_doc = new_document_store.get_latest_document("http://www.google.com")
    print(new_doc.content_node.get_all_content())
Ejemplo n.º 5
0
def test_fluent_pipeline():
    def my_function(doc):
        doc.metadata.cheese = "fishstick"
        logging.error("Hello")
        return doc

    document = create_document()
    new_document_store = LocalDocumentStore()

    stats = Pipeline(document).add_step(my_function).add_step(my_function).add_step(
        DocumentStoreWriter(new_document_store)).run().statistics

    assert stats.documents_processed == 1
    assert stats.document_exceptions == 0
    assert new_document_store.count() == 1
    assert new_document_store.get_latest_document("test.doc").metadata.cheese == 'fishstick'