Esempio n. 1
0
def test_queue_failed_task(app, http_client, base_url):
    task_id = 'my_task'
    task = dict(state='FAILED')
    app.service_manager.pending_tasks[task_id] = task
    rsp = yield http_client.fetch(base_url + '/queue/{}'.format(task_id))
    assert rsp.code == 200
    assert json.loads(to_string(rsp.body))['state'] == 'FAILED'
Esempio n. 2
0
 def poll():
     while True:
         rsp2 = yield http_client.fetch(
             '{}{}'.format(base_url, rsp. headers['Content-Location']),
             method='GET',
             follow_redirects=False)
         assert rsp2.code != 303
         state = json.loads(to_string(rsp2.body))['state']
         if state == 'FAILED':
             break
Esempio n. 3
0
def test_get_existing_service(app, http_client, base_url):
    app.service_manager.db.execute("DELETE FROM entities; "
                                   "DELETE FROM index_url; "
                                   "DELETE FROM index_service_id;")
    row = app.service_manager.db.put(dict(
        url='url',
        state='PENDING'
    ))

    query = "select added_id from entities where HEX(id)='%s';" % (
        to_string(row['id']))
    added_id = app.service_manager.db.query(query)

    req = HTTPRequest(url=base_url+'/api/v1/service/{}'.format(
        added_id[0]['added_id']))
    rsp = yield http_client.fetch(req)
    assert rsp.code == 200
    assert json.loads(to_string(rsp.body)) == \
        {"state": "PENDING", "url": "url"}
Esempio n. 4
0
def test_get_services(app, http_client, base_url):
    app.service_manager.db.execute("DELETE FROM entities; "
                                   "DELETE FROM index_url; "
                                   "DELETE FROM index_service_id;")
    app.service_manager.db.put(dict(
        url='url',
        state='PENDING'
    ))
    req = HTTPRequest(url=base_url+'/api/v1/services')
    rsp = yield http_client.fetch(req)
    assert rsp.code == 200
    assert json.loads(to_string(rsp.body)) == \
        {"services": [{"url": "url",
                       "state": "PENDING"}]}
Esempio n. 5
0
def create_service(service, config):
    logging.debug('service: {} config: {}'.format(service, config))

    # sanity checks
    def check_field_present(field):
        if field not in service:
            raise ValueError("Request doesn't have the {} field".format(field))

    check_field_present('url')
    logging.debug('Adding service: {}'.format(service))
    tmpdir = tempfile.mkdtemp()

    def run_helper(cmd, env=None):
        try:
            logging.debug('Running cmd: {}'.format(cmd))
            return run_cmd(cmd,
                           env=env,
                           stdout_callback=None,
                           stderr_callback=None)
        except CmdException:
            raise

    # download file
    run_helper('cd {}; curl -O {}'.format(tmpdir, service['url']))

    filename = tmpdir + '/' + service['url'].split('/')[-1]
    filetype = magic.from_file(filename)
    filebase = filename.split('.')[-1]

    clean_filetype = to_string(filetype).lower()
    if 'pdf' in clean_filetype:
        run_helper('cd {}; convert {} {}'.format(tmpdir,
                                                 filename,
                                                 filebase + '.png'))
        filename = filebase + '.png'
    elif 'jpeg' in clean_filetype or 'png' in clean_filetype:
        pass
    else:
        raise ValueError('File {} unsupported'.format(filename))

    # run tesseract
    output = tmpdir + '/' + str(uuid.uuid4())
    tesseract_cmd = \
        'cd {}; tesseract {} {}'.format(tmpdir, filename, output)
    run_helper('cd {}; {}'.format(tmpdir, tesseract_cmd))

    # return recognized text
    return open(output + '.txt').read()
Esempio n. 6
0
def create_service(service, config):
    logging.debug('service: {} config: {}'.format(service, config))

    # sanity checks
    def check_field_present(field):
        if field not in service:
            raise ValueError("Request doesn't have the {} field".format(field))

    check_field_present('url')
    logging.debug('Adding service: {}'.format(service))
    tmpdir = tempfile.mkdtemp()

    def run_helper(cmd, env=None):
        try:
            logging.debug('Running cmd: {}'.format(cmd))
            return run_cmd(cmd,
                           env=env,
                           stdout_callback=None,
                           stderr_callback=None)
        except CmdException:
            raise

    # download file
    run_helper('cd {}; curl -O {}'.format(tmpdir, service['url']))

    filename = tmpdir + '/' + service['url'].split('/')[-1]
    filetype = magic.from_file(filename)
    filebase = filename.split('.')[-1]

    clean_filetype = to_string(filetype).lower()
    if 'pdf' in clean_filetype:
        run_helper('cd {}; convert {} {}'.format(tmpdir, filename,
                                                 filebase + '.png'))
        filename = filebase + '.png'
    elif 'jpeg' in clean_filetype or 'png' in clean_filetype:
        pass
    else:
        raise ValueError('File {} unsupported'.format(filename))

    # run tesseract
    output = tmpdir + '/' + str(uuid.uuid4())
    tesseract_cmd = \
        'cd {}; tesseract {} {}'.format(tmpdir, filename, output)
    run_helper('cd {}; {}'.format(tmpdir, tesseract_cmd))

    # return recognized text
    return open(output + '.txt').read()