def test_table_stores_with_extractor(): document_store = LocalDocumentStore() document_store.put("test.doc", create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', TableDataStore(columns=['cheese'])) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add(['test']) return document pipeline.add_step(extractor) context = pipeline.run() assert context.get_store('output').count() == 1
def test_table_stores_with_extractor(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', TableDataStore(columns=['cheese'])) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add(['test']) return document pipeline.add_step(extractor) context = pipeline.run() assert pipeline.context.get_store('output').count() == 1
def test_dict_stores_with_extractor(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', DictDataStore()) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add({'cheese': 'test'}) return document pipeline.add_step(extractor) stats = pipeline.run().statistics assert pipeline.context.get_store('output').count() == 1