def test_queue(api_client): """ /queue should queue a work order. """ for i in range(100): Document.create(path=str(i)) r = api_client.post('/queue', data=dict( model_import = 'osp.corpus.models.Document', job_import = 'osp.corpus.jobs.ext_text', worker_count = 20, offset = 10, )) # Should queue meta-job. assert config.rq.count == 1 # Run the queue-job. meta = config.rq.dequeue() meta.perform() # Should spool the work jobs. for i, doc in enumerate(Document.page_cursor(20, 10)): assert config.rq.jobs[i].func == ext_text assert config.rq.jobs[i].args == (doc.id,)
def test_queue(api_client): """ /queue should queue a work order. """ for i in range(100): Document.create(path=str(i)) r = api_client.post('/queue', data=dict( model_import='osp.corpus.models.Document', job_import='osp.corpus.jobs.ext_text', worker_count=20, offset=10, )) # Should queue meta-job. assert config.rq.count == 1 # Run the queue-job. meta = config.rq.dequeue() meta.perform() # Should spool the work jobs. for i, doc in enumerate(Document.page_cursor(20, 10)): assert config.rq.jobs[i].func == ext_text assert config.rq.jobs[i].args == (doc.id, )
def test_syllabus(mock_osp): """ Document#syllabus should provide a Syllabus instance bound to the file referenced by the document row. """ path = mock_osp.add_file('000', name='123') doc = Document.create(path='000/123') assert isinstance(doc.syllabus, Syllabus) assert doc.syllabus.path == path
def test_format_counts(): """ Document.format_counts() """ d1 = Document.create(path="1") d2 = Document.create(path="2") d3 = Document.create(path="3") d4 = Document.create(path="4") d5 = Document.create(path="5") d6 = Document.create(path="6") # 1 doc with 'format1'. f1 = Document_Format.create(document=d1, format="format1") # 2 docs with 'format2'. f2 = Document_Format.create(document=d2, format="format2") f3 = Document_Format.create(document=d3, format="format2") # 3 docs with 'format3'. f4 = Document_Format.create(document=d4, format="format3") f5 = Document_Format.create(document=d5, format="format3") f6 = Document_Format.create(document=d6, format="format3") assert Document_Format.format_counts() == [("format3", 3), ("format2", 2), ("format1", 1)]
def test_format_counts(): """ Document.format_counts() """ d1 = Document.create(path='1') d2 = Document.create(path='2') d3 = Document.create(path='3') d4 = Document.create(path='4') d5 = Document.create(path='5') d6 = Document.create(path='6') # 1 doc with 'format1'. f1 = Document_Format.create(document=d1, format='format1') # 2 docs with 'format2'. f2 = Document_Format.create(document=d2, format='format2') f3 = Document_Format.create(document=d3, format='format2') # 3 docs with 'format3'. f4 = Document_Format.create(document=d4, format='format3') f5 = Document_Format.create(document=d5, format='format3') f6 = Document_Format.create(document=d6, format='format3') assert Document_Format.format_counts() == [('format3', 3), ('format2', 2), ('format1', 1)]
def _doc(*args, **kwargs): # Write a file. path = mock_osp.add_file(*args, **kwargs) syllabus = Syllabus(path) # Insert the document row. document = Document.create(path=syllabus.relative_path) # Extract text. text = ext_text(document.id) return document
def test_text_extraction_fails(mock_osp): """ If no text can be extracted, don't write the row. """ # Add an empty file. path = mock_osp.add_file(content='') document = Document.create(path=path) ext_text(document.id) # Shouldn't write a row. assert Document_Text.select().count() == 0
def test_read_format(mock_osp): """ read_format() should write the format to the `document_format` table. """ # Add a file, create a document row. path = mock_osp.add_file() document = Document.create(path=path) ext_format(document.id) # Pop out the new row. row = Document_Format.get(Document_Format.document == document) assert row.format == 'text/plain'
def test_read_format(mock_osp): """ read_format() should write the format to the `document_format` table. """ # Add a file, create a document row. path = mock_osp.add_file() document = Document.create(path=path) ext_format(document.id) # Pop out the new row. row = Document_Format.get(Document_Format.document==document) assert row.format == 'text/plain'
def test_text_extraction_succeeds(mock_osp): """ read_text() should extract text for a document and write the result into the `document_text` table. """ # Add a file, create a document row. path = mock_osp.add_file(content='text') document = Document.create(path=path) ext_text(document.id) # Pop out the new row. row = Document_Text.get(Document_Text.document == document) assert row.text == 'text'
def test_text_extraction_succeeds(mock_osp): """ read_text() should extract text for a document and write the result into the `document_text` table. """ # Add a file, create a document row. path = mock_osp.add_file(content='text') document = Document.create(path=path) ext_text(document.id) # Pop out the new row. row = Document_Text.get(Document_Text.document==document) assert row.text == 'text'