def test_es_insert(models, config, corpus_index): """ CorpusIndex.index() should index all rows in Elasticsearch. """ # Index 100 documents. for i in range(10): doc = Document.create(path=str(i)) Document_Text.create(document=doc, text=str(i)) Document_Text.es_insert() # Should insert 10 docs. assert Document_Text.es_count() == 10 # For each text row: for t in Document_Text.select(): # A document should exist. doc = config.es.get('osp', t.document.path) # Should index text / doc ID. assert doc['_source']['doc_id'] == t.document.id assert doc['_source']['body'] == t.document.path
def test_matches(corpus_index, mock_hlom, add_doc, add_hlom): """ When OSP documents match the query, write link rows. """ d1 = add_doc('War and Peace, Leo Tolstoy 1') d2 = add_doc('War and Peace, Leo Tolstoy 2') d3 = add_doc('War and Peace, Leo Tolstoy 3') d4 = add_doc('Anna Karenina, Leo Tolstoy 1') d5 = add_doc('Anna Karenina, Leo Tolstoy 2') Document_Text.es_insert() record = add_hlom('War and Peace', 'Leo Tolstoy') query(record.id) # Should write 3 citation links. assert HLOM_Citation.select().count() == 3 # Should match the right documents. for doc in [d1, d2, d3]: assert HLOM_Citation.select().where( HLOM_Citation.document==doc, HLOM_Citation.record==record )
def insert(): """ Index documents. """ Document_Text.es_insert()
def test_no_matches(corpus_index, add_doc, add_hlom): """ When no documents match, don't write any rows. """ add_doc('War and Peace, Leo Tolstoy') Document_Text.es_insert() record = add_hlom('Master and Man', 'Leo Tolstoy') query(record.id) # Shouldn't write any rows. assert HLOM_Citation.select().count() == 0