def test_es_insert(models, config, corpus_index): """ CorpusIndex.index() should index all rows in Elasticsearch. """ # Index 100 documents. for i in range(10): doc = Document.create(path=str(i)) Document_Text.create(document=doc, text=str(i)) Document_Text.es_insert() # Should insert 10 docs. assert Document_Text.es_count() == 10 # For each text row: for t in Document_Text.select(): # A document should exist. doc = config.es.get('osp', t.document.path) # Should index text / doc ID. assert doc['_source']['doc_id'] == t.document.id assert doc['_source']['body'] == t.document.path
def test_es_doc(models): """ Document_Text#es_doc() should return an Elasticsearch document. """ doc = Document.create(path='000/abc') text = Document_Text.create(document=doc, text='text') assert text.es_doc['_id'] == '000/abc' assert text.es_doc['doc_id'] == doc.id assert text.es_doc['body'] == 'text'
def ext_text(doc_id): """ Write the document as plain text. Args: doc_id (int): The document id. """ doc = Document.get(Document.id==doc_id) if doc.syllabus.text: return Document_Text.create( text=doc.syllabus.text, document=doc )
def test_link_with_document(models, mock_osp): """ When a semester marker is found, the metadata row should be associated with the document that was passed to the job. """ # 2 document rows. doc1 = Document.create(path='path1') doc2 = Document.create(path='path2') # Just 1 text row. doc_text = Document_Text.create(document=doc2, text='Fall 2012') assert doc_text.id != doc_text.document.id row = ext_semester(doc2.id) assert row.document == doc2