def test_queue(api_client):

    """
    /queue should queue a work order.
    """

    for i in range(100):
        Document.create(path=str(i))

    r = api_client.post('/queue', data=dict(

        model_import    = 'osp.corpus.models.Document',
        job_import      = 'osp.corpus.jobs.ext_text',
        worker_count    = 20,
        offset          = 10,

    ))

    # Should queue meta-job.
    assert config.rq.count == 1

    # Run the queue-job.
    meta = config.rq.dequeue()
    meta.perform()

    # Should spool the work jobs.
    for i, doc in enumerate(Document.page_cursor(20, 10)):
        assert config.rq.jobs[i].func == ext_text
        assert config.rq.jobs[i].args == (doc.id,)
def test_queue(api_client):
    """
    /queue should queue a work order.
    """

    for i in range(100):
        Document.create(path=str(i))

    r = api_client.post('/queue',
                        data=dict(
                            model_import='osp.corpus.models.Document',
                            job_import='osp.corpus.jobs.ext_text',
                            worker_count=20,
                            offset=10,
                        ))

    # Should queue meta-job.
    assert config.rq.count == 1

    # Run the queue-job.
    meta = config.rq.dequeue()
    meta.perform()

    # Should spool the work jobs.
    for i, doc in enumerate(Document.page_cursor(20, 10)):
        assert config.rq.jobs[i].func == ext_text
        assert config.rq.jobs[i].args == (doc.id, )
예제 #3
0
def test_syllabus(mock_osp):

    """
    Document#syllabus should provide a Syllabus instance bound to the file
    referenced by the document row.
    """

    path = mock_osp.add_file('000', name='123')
    doc = Document.create(path='000/123')

    assert isinstance(doc.syllabus, Syllabus)
    assert doc.syllabus.path == path
def test_format_counts():

    """
    Document.format_counts()
    """

    d1 = Document.create(path="1")
    d2 = Document.create(path="2")
    d3 = Document.create(path="3")
    d4 = Document.create(path="4")
    d5 = Document.create(path="5")
    d6 = Document.create(path="6")

    # 1 doc with 'format1'.
    f1 = Document_Format.create(document=d1, format="format1")

    # 2 docs with 'format2'.
    f2 = Document_Format.create(document=d2, format="format2")
    f3 = Document_Format.create(document=d3, format="format2")

    # 3 docs with 'format3'.
    f4 = Document_Format.create(document=d4, format="format3")
    f5 = Document_Format.create(document=d5, format="format3")
    f6 = Document_Format.create(document=d6, format="format3")

    assert Document_Format.format_counts() == [("format3", 3), ("format2", 2), ("format1", 1)]
def test_format_counts():
    """
    Document.format_counts()
    """

    d1 = Document.create(path='1')
    d2 = Document.create(path='2')
    d3 = Document.create(path='3')
    d4 = Document.create(path='4')
    d5 = Document.create(path='5')
    d6 = Document.create(path='6')

    # 1 doc with 'format1'.
    f1 = Document_Format.create(document=d1, format='format1')

    # 2 docs with 'format2'.
    f2 = Document_Format.create(document=d2, format='format2')
    f3 = Document_Format.create(document=d3, format='format2')

    # 3 docs with 'format3'.
    f4 = Document_Format.create(document=d4, format='format3')
    f5 = Document_Format.create(document=d5, format='format3')
    f6 = Document_Format.create(document=d6, format='format3')

    assert Document_Format.format_counts() == [('format3', 3), ('format2', 2),
                                               ('format1', 1)]
    def _doc(*args, **kwargs):

        # Write a file.
        path = mock_osp.add_file(*args, **kwargs)
        syllabus = Syllabus(path)

        # Insert the document row.
        document = Document.create(path=syllabus.relative_path)

        # Extract text.
        text = ext_text(document.id)

        return document
예제 #7
0
    def _doc(*args, **kwargs):

        # Write a file.
        path = mock_osp.add_file(*args, **kwargs)
        syllabus = Syllabus(path)

        # Insert the document row.
        document = Document.create(path=syllabus.relative_path)

        # Extract text.
        text = ext_text(document.id)

        return document
def test_text_extraction_fails(mock_osp):
    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content='')
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0
def test_text_extraction_fails(mock_osp):

    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content='')
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0
예제 #10
0
def test_read_format(mock_osp):
    """
    read_format() should write the format to the `document_format` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file()
    document = Document.create(path=path)

    ext_format(document.id)

    # Pop out the new row.
    row = Document_Format.get(Document_Format.document == document)
    assert row.format == 'text/plain'
def test_read_format(mock_osp):

    """
    read_format() should write the format to the `document_format` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file()
    document = Document.create(path=path)

    ext_format(document.id)

    # Pop out the new row.
    row = Document_Format.get(Document_Format.document==document)
    assert row.format == 'text/plain'
def test_text_extraction_succeeds(mock_osp):
    """
    read_text() should extract text for a document and write the result into
    the `document_text` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file(content='text')
    document = Document.create(path=path)

    ext_text(document.id)

    # Pop out the new row.
    row = Document_Text.get(Document_Text.document == document)
    assert row.text == 'text'
def test_text_extraction_succeeds(mock_osp):

    """
    read_text() should extract text for a document and write the result into
    the `document_text` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file(content='text')
    document = Document.create(path=path)

    ext_text(document.id)

    # Pop out the new row.
    row = Document_Text.get(Document_Text.document==document)
    assert row.text == 'text'