Ejemplo n.º 1
0
def test_es_insert(models, config, corpus_index):

    """
    CorpusIndex.index() should index all rows in Elasticsearch.
    """

    # Index 100 documents.
    for i in range(10):
        doc = Document.create(path=str(i))
        Document_Text.create(document=doc, text=str(i))

    Document_Text.es_insert()

    # Should insert 10 docs.
    assert Document_Text.es_count() == 10

    # For each text row:
    for t in Document_Text.select():

        # A document should exist.
        doc = config.es.get('osp', t.document.path)

        # Should index text / doc ID.
        assert doc['_source']['doc_id'] == t.document.id
        assert doc['_source']['body']   == t.document.path
Ejemplo n.º 2
0
def test_text_extraction_fails(models, mock_osp):

    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content="")
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0