def test_queue_failed_task(app, http_client, base_url): task_id = 'my_task' task = dict(state='FAILED') app.service_manager.pending_tasks[task_id] = task rsp = yield http_client.fetch(base_url + '/queue/{}'.format(task_id)) assert rsp.code == 200 assert json.loads(to_string(rsp.body))['state'] == 'FAILED'
def poll(): while True: rsp2 = yield http_client.fetch( '{}{}'.format(base_url, rsp. headers['Content-Location']), method='GET', follow_redirects=False) assert rsp2.code != 303 state = json.loads(to_string(rsp2.body))['state'] if state == 'FAILED': break
def test_get_existing_service(app, http_client, base_url): app.service_manager.db.execute("DELETE FROM entities; " "DELETE FROM index_url; " "DELETE FROM index_service_id;") row = app.service_manager.db.put(dict( url='url', state='PENDING' )) query = "select added_id from entities where HEX(id)='%s';" % ( to_string(row['id'])) added_id = app.service_manager.db.query(query) req = HTTPRequest(url=base_url+'/api/v1/service/{}'.format( added_id[0]['added_id'])) rsp = yield http_client.fetch(req) assert rsp.code == 200 assert json.loads(to_string(rsp.body)) == \ {"state": "PENDING", "url": "url"}
def test_get_services(app, http_client, base_url): app.service_manager.db.execute("DELETE FROM entities; " "DELETE FROM index_url; " "DELETE FROM index_service_id;") app.service_manager.db.put(dict( url='url', state='PENDING' )) req = HTTPRequest(url=base_url+'/api/v1/services') rsp = yield http_client.fetch(req) assert rsp.code == 200 assert json.loads(to_string(rsp.body)) == \ {"services": [{"url": "url", "state": "PENDING"}]}
def create_service(service, config): logging.debug('service: {} config: {}'.format(service, config)) # sanity checks def check_field_present(field): if field not in service: raise ValueError("Request doesn't have the {} field".format(field)) check_field_present('url') logging.debug('Adding service: {}'.format(service)) tmpdir = tempfile.mkdtemp() def run_helper(cmd, env=None): try: logging.debug('Running cmd: {}'.format(cmd)) return run_cmd(cmd, env=env, stdout_callback=None, stderr_callback=None) except CmdException: raise # download file run_helper('cd {}; curl -O {}'.format(tmpdir, service['url'])) filename = tmpdir + '/' + service['url'].split('/')[-1] filetype = magic.from_file(filename) filebase = filename.split('.')[-1] clean_filetype = to_string(filetype).lower() if 'pdf' in clean_filetype: run_helper('cd {}; convert {} {}'.format(tmpdir, filename, filebase + '.png')) filename = filebase + '.png' elif 'jpeg' in clean_filetype or 'png' in clean_filetype: pass else: raise ValueError('File {} unsupported'.format(filename)) # run tesseract output = tmpdir + '/' + str(uuid.uuid4()) tesseract_cmd = \ 'cd {}; tesseract {} {}'.format(tmpdir, filename, output) run_helper('cd {}; {}'.format(tmpdir, tesseract_cmd)) # return recognized text return open(output + '.txt').read()