def status(verbose, host, job_id): """ Diplays the status and results of jobs. """ if host: batch = NetworkSimpleBatch(host, job_id) else: batch = SimpleBatch(job_id) state = batch.get_extended_state() click.secho('Status:', underline=True, nl=False) if not state: click.echo(' UNKNOWN') return bs = 'success' done = 0 running = 0 pending = 0 failed = 0 results = [] errors = [] expected = len(state) failed_children = set() for task_id, subtask in state.iteritems(): if subtask['state'] == 'SUCCESS': done += 1 elif subtask['state'] == 'RUNNING': running += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'PENDING': pending += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'FAILURE': failed += 1 children = [] if not isinstance(subtask['children'], list): subtask['children'] = [subtask['children']] for child in subtask['children']: if not isinstance(state[child]['children'], list): state[child]['children'] = [state[child]['children']] children.extend(state[child]['children']) failed_children.add(child) errors.append(subtask) bs = 'failed' if len(subtask['children']) == 0 and not subtask[ 'housekeeping'] and subtask['result'] is not None: # try to find statistics results parents = [task_id] + subtask['parents'] misc = None for parent in parents: parents.extend(state[parent]['parents']) if 'misc' in state[parent]: misc = state[parent]['misc'] break results.append((subtask['result'], subtask['root_document'], misc)) final = '(final)' if not expected - failed - done - len( failed_children) else '' click.echo(' {} {}\n'.format(bs, final)) click.echo('{}/{} tasks completed. {} running.\n'.format( done, len(state), running)) click.secho('Output files:\n', underline=True) results = sorted(results, key=lambda x: x[0][1]) if results and host: for doc in results: if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format( doc[1], doc[0], 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1], doc[0])) elif results: from nidaba import storage for doc in results: output = click.format_filename(storage.get_abs_path(*doc[0])) if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format( doc[1][1], output, 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1][1], output)) if errors: click.secho('\nErrors:\n', underline=True) for task in errors: tb = '' args = '' if verbose > 0: tb = task['errors'][2] if verbose > 1: task['errors'][0].pop('method') args = ', ' + str(task['errors'][0]) click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0], task['root_document'][1], args, tb, task['errors'][1]))
def batch(files, host, preprocessing, binarize, ocr, segmentation, stats, postprocessing, output, grayscale, help_tasks): """ Add a new job to the pipeline. """ if host: batch = NetworkSimpleBatch(host) click.echo(u'Preparing filestore\t\t[', nl=False) try: batch.create_batch() except: click.secho(u'\u2717', fg='red', nl=False) click.echo(']') exit() click.secho(u'\u2713', fg='green', nl=False) click.echo(']') for doc in files: def callback(monitor): spin(u'Uploading {}'.format(doc)) batch.add_document(doc, callback) click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) else: from nidaba import storage click.echo(u'Preparing filestore\t\t[', nl=False) try: batch = SimpleBatch() except: click.secho(u'\u2717', fg='red', nl=False) click.echo(']') exit() for doc in files: shutil.copy2(doc, storage.get_abs_path(batch.id, os.path.basename(doc))) batch.add_document((batch.id, os.path.basename(doc))) click.secho(u'\u2713', fg='green', nl=False) click.echo(']') click.echo(u'Building batch\t\t\t[', nl=False) if not grayscale: batch.add_task('img', 'rgb_to_gray') if preprocessing: for alg in preprocessing: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('img', alg[0], **kwargs) if binarize: for alg in binarize: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('binarize', alg[0], **kwargs) if segmentation: for alg in segmentation: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('segmentation', alg[0], **kwargs) if ocr: for alg in ocr: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('ocr', alg[0], **kwargs) if stats: for alg in stats: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('stats', alg[0], **kwargs) if postprocessing: for alg in postprocessing: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('postprocessing', alg[0], **kwargs) if output: for alg in output: for kwargs in alg[1]: kwargs = move_to_storage(batch, kwargs) batch.add_task('output', alg[0], **kwargs) batch.run() click.secho(u'\u2713', fg='green', nl=False) click.echo(']') click.echo(batch.id)
def status(verbose, host, job_id): """ Diplays the status and results of jobs. """ if host: batch = NetworkSimpleBatch(host, job_id) else: batch = SimpleBatch(job_id) state = batch.get_extended_state() click.secho('Status:', underline=True, nl=False) if not state: click.echo(' UNKNOWN') return bs = 'success' done = 0 running = 0 pending = 0 failed = 0 results = [] errors = [] expected = len(state) failed_children = set() for task_id, subtask in state.iteritems(): if subtask['state'] == 'SUCCESS': done += 1 elif subtask['state'] == 'RUNNING': running += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'PENDING': pending += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'FAILURE': failed += 1 children = [] if not isinstance(subtask['children'], list): subtask['children'] = [subtask['children']] for child in subtask['children']: if not isinstance(state[child]['children'], list): state[child]['children'] = [state[child]['children']] children.extend(state[child]['children']) failed_children.add(child) errors.append(subtask) bs = 'failed' if len(subtask['children']) == 0 and not subtask['housekeeping'] and subtask['result'] is not None: # try to find statistics results parents = [task_id] + subtask['parents'] misc = None for parent in parents: parents.extend(state[parent]['parents']) if 'misc' in state[parent]: misc = state[parent]['misc'] break results.append((subtask['result'], subtask['root_document'], misc)) final = '(final)' if not expected - failed - done - len(failed_children) else '' click.echo(' {} {}\n'.format(bs, final)) click.echo('{}/{} tasks completed. {} running.\n'.format(done, len(state), running)) click.secho('Output files:\n', underline=True) results = sorted(results, key=lambda x: x[0][1]) if results and host: for doc in results: if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(doc[1], doc[0], 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1], doc[0])) elif results: from nidaba import storage for doc in results: output = click.format_filename(storage.get_abs_path(*doc[0])) if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(doc[1][1], output, 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(doc[1][1], output)) if errors: click.secho('\nErrors:\n', underline=True) for task in errors: tb = '' args = '' if verbose > 0: tb = task['errors'][2] if verbose > 1: task['errors'][0].pop('method') args = ', ' + str(task['errors'][0]) click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0], task['root_document'][1], args, tb, task['errors'][1]))
def status(verbose, host, job_id): """ Diplays the status and results of jobs. """ click.secho('Status:', underline=True, nl=False) if host: batch = NetworkSimpleBatch(host, job_id) else: try: batch = Batch(job_id) except NidabaInputException: click.echo(' UNKNOWN') return state = batch.get_extended_state() if not state: click.echo(' UNKNOWN') return bs = 'success' done = 0 running = 0 pending = 0 failed = 0 results = [] errors = [] expected = len(state) for task_id, subtask in state.iteritems(): if subtask['state'] == 'SUCCESS': done += 1 elif subtask['state'] == 'RUNNING': running += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'PENDING': pending += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'FAILURE': errors.append(subtask) bs = 'failed' # leaf nodes/result extraction if len(subtask['children']) == 0 and subtask['result'] is not None: # try to find statistics results parents = [task_id] + subtask['parents'] misc = None for parent in parents: parents.extend(state[parent]['parents']) if 'misc' in state[parent]: misc = state[parent]['misc'] break # archival tasks bunch everything together. do a sort-based matching of input and output tasks if isinstance(subtask['result'][0], list) or host and len(subtask['result']) > 1: for res, rd in zip(sorted(subtask['result']), sorted(subtask['root_documents'])): if host: res = [res] results.append((res, [rd], misc)) else: results.append( (subtask['result'], subtask['root_documents'], misc)) final = '(final)' if expected - done == 0 else '' click.echo(' {} {}\n'.format(bs, final)) click.echo('{}/{} tasks completed. {} running.\n'.format( done, expected, running)) # render results click.secho('Output files:\n', underline=True) results = sorted(results, key=lambda x: x[0][0][1] if isinstance(x[0], list) else x[0][1]) if results: for doc in results: if host: output = ', '.join(doc[0]) input = ', '.join(doc[1]) else: from nidaba import storage if isinstance(doc[0][0], list): for d in doc: output = ', '.join( click.format_filename(storage.get_abs_path(*d))) else: output = click.format_filename( storage.get_abs_path(*doc[0])) input = ', '.join(d[1] for d in doc[1]) if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format( input, output, 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(input, output)) # render errors if errors: click.secho('\nErrors:\n', underline=True) for task in errors: tb = '' args = '' if verbose > 0: tb = task['errors'][2] if verbose > 1: task['errors'][0].pop('method') args = ', ' + str(task['errors'][0]) if host: rd = ', '.join( os.path.basename(x) for x in task['root_documents']) else: rd = ', '.join( os.path.basename(x[1]) for x in task['root_documents']) click.echo('{}.{} ({}{}): {}{}'.format(task['task'][0], task['task'][1], rd, args, tb, task['errors'][1]))
def status(verbose, host, job_id): """ Diplays the status and results of jobs. """ click.secho('Status:', underline=True, nl=False) if host: batch = NetworkSimpleBatch(host, job_id) else: try: batch = Batch(job_id) except NidabaInputException: click.echo(' UNKNOWN') return state = batch.get_extended_state() if not state: click.echo(' UNKNOWN') return bs = 'success' done = 0 running = 0 pending = 0 failed = 0 results = [] errors = [] expected = len(state) for task_id, subtask in state.iteritems(): if subtask['state'] == 'SUCCESS': done += 1 elif subtask['state'] == 'RUNNING': running += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'PENDING': pending += 1 if bs == 'success': bs = 'pending' elif subtask['state'] == 'FAILURE': errors.append(subtask) bs = 'failed' # leaf nodes/result extraction if len(subtask['children']) == 0 and subtask['result'] is not None: # try to find statistics results parents = [task_id] + subtask['parents'] misc = None for parent in parents: parents.extend(state[parent]['parents']) if 'misc' in state[parent]: misc = state[parent]['misc'] break # archival tasks bunch everything together. do a sort-based matching of input and output tasks if isinstance(subtask['result'][0], list) or (host and isinstance(subtask['result'], list)): for res, rd in zip(sorted(subtask['result']), sorted(subtask['root_documents'])): if host: res = [res] results.append((res, [rd], misc)) else: if host: subtask['result'] = [subtask['result']] results.append((subtask['result'], subtask['root_documents'], misc)) final = '(final)' if expected - done == 0 else '' click.echo(' {} {}\n'.format(bs, final)) click.echo('{}/{} tasks completed. {} running.\n'.format(done, expected, running)) # render results click.secho('Output files:\n', underline=True) results = sorted(results, key=lambda x: x[0][0][1] if isinstance(x[0], list) else x[0][1]) if results: for doc in results: if host: output = ', '.join(doc[0]) input = ', '.join(doc[1]) else: from nidaba import storage if isinstance(doc[0][0], list): for d in doc: output = ', '.join(click.format_filename(storage.get_abs_path(*d))) else: output = click.format_filename(storage.get_abs_path(*doc[0])) input = ', '.join(d[1] for d in doc[1]) if doc[2] is not None: click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(input, output, 100 * doc[2]['edit_ratio'], doc[2]['ground_truth'][1])) else: click.echo(u'{} \u2192 {}'.format(input, output)) # render errors if errors: click.secho('\nErrors:\n', underline=True) for task in errors: tb = '' args = '' if verbose > 0: tb = task['errors'][2] if verbose > 1: task['errors'][0].pop('method') args = ', ' + str(task['errors'][0]) if host: rd = ', '.join(os.path.basename(x) for x in task['root_documents']) else: rd = ', '.join(os.path.basename(x[1]) for x in task['root_documents']) click.echo('{}.{} ({}{}): {}{}'.format(task['task'][0], task['task'][1], rd, args, tb, task['errors'][1]))