def post(self): """ Creates a new batch and returns it identifier. ** Request ** .. sourcecode:: http POST /batch ** Response ** .. sourcecode:: http HTTP/1.1 201 CREATED { "id": "78a1f1e4-cc76-40ce-8a98-77b54362a00e", "url": "/batch/78a1f1e4-cc76-40ce-8a98-77b54362a00e" } :status 201: Successfully created """ log.debug('Routing to batch with POST') batch = SimpleBatch() data = {'id': batch.id, 'url': url_for('api.batch', batch_id=batch.id)} log.debug('Created batch {}'.format(batch.id)) return data, 201
def post(self, batch_id): """ Adds a page (really any type of file) to the batch identified by *batch_id*. ** Request ** POST /batch/:batch/pages ** Response ** HTTP/1.1 201 OK [ { "name": "0033.tif", "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif" } ] :form scans: file(s) to add to the batch :status 201: task created :status 403: file couldn't be created :status 404: batch not found """ args = self.parser.parse_args() log.debug('Routing to pages {} of {} (POST)'.format( [x.filename for x in args['scans']], batch_id)) try: batch = SimpleBatch(batch_id) except: return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 data = [] for file in args['scans']: try: fp = storage.StorageFile(batch_id, file.filename, 'wb') except NidabaStorageViolationException as e: log.debug('Failed to write file {}'.format(file.filename), exc_info=True) return {'message': str(e)}, 403 else: with fp: file.save(fp) file.close() if args['auxiliary'] is False: log.debug('Adding {}/{} to {}'.format( fp.storage_path[0], fp.storage_path[1], batch_id)) batch.add_document(fp.storage_path) data.append({ 'name': file.filename, 'url': url_for('api.page', batch=batch_id, file=file.filename) }) return data, 201
def get(self, batch_id): """ Retrieves the state of batch *batch_id*. ** Request ** .. sourcecode:: http GET /batch/:batch_id ** Response ** .. sourcecode:: http HTTP/1.1 200 OK :param batch_id: batch identifier :type batch_id: string :status 200: No error :status 404: No such batch """ log.debug('Routing to batch {} (GET)'.format(batch_id)) res = {} try: batch = SimpleBatch(batch_id) except: return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 res['pages'] = url_for('api.batchpages', batch_id=batch_id) res['tasks'] = url_for('api.batchtasks', batch_id=batch_id) if batch.is_running(): res['chains'] = batch.get_extended_state() # replace all document tuples with URLs to the page resource def replace_docs(state): for k in state.keys(): if k in ['root_document', 'result', 'doc']: if state[k] is not None and isinstance( state[k][0], list): docs = [] for doc in state[k]: docs.append( url_for('api.page', batch=doc[0], file=doc[1])) state[k] = docs elif state[k] is not None: state[k] = url_for('api.page', batch=state[k][0], file=state[k][1]) if isinstance(state[k], dict): replace_docs(state[k]) replace_docs(res['chains']) return res, 200
def get(self, batch_id): """ Returns the list of pages associated with the batch with *batch_id*. ** Request ** .. sourcecode:: http GET /batch/:batch/pages ** Response ** .. sourcecode:: http HTTP/1.1 200 OK [ { "name": "0033.tif", "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif" }, { "name": "0072.tif", "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0072.tif" }, { "name": "0014.tif", "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0014.tif" } ] :status 200: success :status 404: batch not found """ log.debug('Routing to pages of {} (GET)'.format(batch_id)) try: batch = SimpleBatch(batch_id) except: return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 data = [] for doc in batch.get_documents(): data.append({ 'name': doc[1], 'url': url_for('api.page', batch=doc[0], file=doc[1]) }) return data, 200
def post(self, batch_id): """ Executes batch with identifier *batch_id* ** Request ** .. sourcecode:: http POST /batch/:batch_id ** Response ** .. sourcecode:: http HTTP/1.1 202 ACCEPTED :param batch_id: batch's unique id :type batch_id: string :status 202: Successfully executed :status 400: Batch could not be executed :status 404: No such batch :status 409: Trying to reexecute an already executed batch """ log.debug('Routing to batch {} (POST)'.format(batch_id)) try: batch = SimpleBatch(batch_id) except: log.debug('Batch {} not found'.format(batch_id)) return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 if batch.get_state() == 'NONE': try: batch.run() return { 'id': batch_id, 'url': url_for('api.batch', batch_id=batch_id) }, 202 except: log.debug('Batch {} could not be executed'.format(batch_id), exc_info=True) return {'message': 'Batch could not be executed'}, 400 else: log.debug('Batch {} already executed'.format(batch_id)) return {'message': 'Batch already executed'}, 409
def status(verbose, host, job_id): """ Diplays the status and results of jobs. """ if host: batch = NetworkSimpleBatch(host, job_id) else: batch = SimpleBatch(job_id) state = batch.get_extended_state() click.secho('Status:', underline=True, nl=False) if not state: click.echo(' UNKNOWN') return bs = 'success' done = 0 running = 0 pending = 0 failed = 0 results = [] errors = [] expected = len(state) failed_children = set() for task_id, subtask in state.iteritems(): if subtask['state'] == 'SUCCESS': done += 1 elif subtask['state'] == 'RUNNING': running += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'PENDING': pending += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'FAILURE': failed += 1 children = [] if not isinstance(subtask['children'], list): subtask['children'] = [subtask['children']] for child in subtask['children']: if not isinstance(state[child]['children'], list): state[child]['children'] = [state[child]['children']] children.extend(state[child]['children']) failed_children.add(child) errors.append(subtask) bs = 'failed' if len(subtask['children']) == 0 and not subtask[ 'housekeeping'] and subtask['result'] is not None: # try to find statistics results parents = [task_id] + subtask['parents'] misc = None for parent in parents: parents.extend(state[parent]['parents']) if 'misc' in state[parent]: misc = state[parent]['misc'] break results.append((subtask['result'], subtask['root_document'], misc)) final = '(final)' if not expected - failed - done - len( failed_children) else '' click.echo(' {} {}\n'.format(bs, final)) click.echo('{}/{} tasks completed. {} running.\n'.format( done, len(state), running)) click.secho('Output files:\n', underline=True) results = sorted(results, key=lambda x: x[0][1]) if results and host: for doc in results: if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format( doc[1], doc[0], 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1], doc[0])) elif results: from nidaba import storage for doc in results: output = click.format_filename(storage.get_abs_path(*doc[0])) if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format( doc[1][1], output, 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1][1], output)) if errors: click.secho('\nErrors:\n', underline=True) for task in errors: tb = '' args = '' if verbose > 0: tb = task['errors'][2] if verbose > 1: task['errors'][0].pop('method') args = ', ' + str(task['errors'][0]) click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0], task['root_document'][1], args, tb, task['errors'][1]))
def batch(files, host, preprocessing, binarize, ocr, segmentation, stats, postprocessing, output, grayscale, help_tasks): """ Add a new job to the pipeline. """ if host: batch = NetworkSimpleBatch(host) click.echo(u'Preparing filestore\t\t[', nl=False) try: batch.create_batch() except: click.secho(u'\u2717', fg='red', nl=False) click.echo(']') exit() click.secho(u'\u2713', fg='green', nl=False) click.echo(']') for doc in files: def callback(monitor): spin(u'Uploading {}'.format(doc)) batch.add_document(doc, callback) click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) else: from nidaba import storage click.echo(u'Preparing filestore\t\t[', nl=False) try: batch = SimpleBatch() except: click.secho(u'\u2717', fg='red', nl=False) click.echo(']') exit() for doc in files: shutil.copy2(doc, storage.get_abs_path(batch.id, os.path.basename(doc))) batch.add_document((batch.id, os.path.basename(doc))) click.secho(u'\u2713', fg='green', nl=False) click.echo(']') click.echo(u'Building batch\t\t\t[', nl=False) if not grayscale: batch.add_task('img', 'rgb_to_gray') if preprocessing: for alg in preprocessing: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('img', alg[0], **kwargs) if binarize: for alg in binarize: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('binarize', alg[0], **kwargs) if segmentation: for alg in segmentation: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('segmentation', alg[0], **kwargs) if ocr: for alg in ocr: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('ocr', alg[0], **kwargs) if stats: for alg in stats: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('stats', alg[0], **kwargs) if postprocessing: for alg in postprocessing: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('postprocessing', alg[0], **kwargs) if output: for alg in output: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('output', alg[0], **kwargs) batch.run() click.secho(u'\u2713', fg='green', nl=False) click.echo(']') click.echo(batch.id)
def post(self, batch_id, group=None, task=None): """ Adds a particular configuration of a task to the batch identified by *batch_id*. ** Request ** POST /batch/:batch_id/:group/:task { kwarg_1: "value", kwarg_2: 10, kwarg_3: 'true', kwarg_4: ["a", "b"], kwarg_5: '/pages/:batch_id/path' } ** Response ** .. sourcecode:: http HTTP/1.1 201 CREATED To post files as arguments use their URL returned by the call that created them on the batch. Booleans are strings containing either the values 'True'/'true' or 'False'/'false'. :status 201: task created :status 404: batch, group, or task not found. """ log.debug('Routing to task {}.{} of {} (POST)'.format( group, task, batch_id)) try: batch = SimpleBatch(batch_id) except: return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 try: def arg_conversion(s): # JSON does not support booleans if s in ['True', 'true']: return True elif s in ['False', 'false']: return False # XXX: find a nicer way to rewrite page URLs base_url = url_for('api.page', batch=batch_id, file='') if isinstance(s, basestring) and s.startswith(base_url): rem = s.replace(base_url, '', 1) return (batch_id, rem) return s kwargs = { k: arg_conversion(v) for k, v in request.get_json().iteritems() } batch.add_task(group, task, **kwargs) except Exception as e: log.debug('Adding task {} to {} failed: {}'.format( task, batch_id, str(e))) return {'message': str(e)}, 422 return {}, 201
def get(self, batch_id, group=None, task=None): """ Retrieves the list of tasks and their argument values associated with a batch, optionally limited to a specific group. ** Request ** .. sourcecode:: http GET /batch/:batch_id/tasks ** Response ** .. sourcecode:: http HTTP/1.1 200 OK { "segmentation": [ ["tesseract", {}] ], "ocr": [ ["kraken", { "model": "teubner", } ] ] } To limit output to a specific group of tasks, e.g. segmentation or binarization append the group to the URL: ** Request ** .. sourcecode:: http GET /batch/:batch_id/tasks/:group ** Response ** .. sourcecode:: http HTTP/1.1 200 OK { 'group': [ ["tesseract", {}], ["kraken", {}] ] } :status 200: success :status 404: batch, group, or task not found. """ log.debug('Routing to task {}.{} of {} (GET)'.format( group, task, batch_id)) try: batch = SimpleBatch(batch_id) except: log.debug('Batch {} not found'.format(batch_id)) return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404 tasks = batch.get_tasks() if group and group not in tasks: log.debug('Unknown group {} ({})'.format(group, batch_id)) return {'message': 'Unknown group {}'.format(group)}, 404 elif task and task not in tasks[group]: log.debug('Unknown task {}.{} ({})'.format(group, task, batch_id)) return {'message': 'Unknown task {}'.format(task)}, 404 if group: tasks = {group: tasks[group]} if task: tasks = {group: {task: tasks[group][task]}} return tasks, 200