Ejemplo n.º 1
0
def status(verbose, host, job_id):
    """
    Diplays the status and results of jobs.
    """
    if host:
        batch = NetworkSimpleBatch(host, job_id)
    else:
        batch = SimpleBatch(job_id)

    state = batch.get_extended_state()

    click.secho('Status:', underline=True, nl=False)
    if not state:
        click.echo(' UNKNOWN')
        return

    bs = 'success'
    done = 0
    running = 0
    pending = 0
    failed = 0
    results = []
    errors = []
    expected = len(state)
    failed_children = set()
    for task_id, subtask in state.iteritems():
        if subtask['state'] == 'SUCCESS':
            done += 1
        elif subtask['state'] == 'RUNNING':
            running += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'PENDING':
            pending += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'FAILURE':
            failed += 1
            children = []
            if not isinstance(subtask['children'], list):
                subtask['children'] = [subtask['children']]
            for child in subtask['children']:
                if not isinstance(state[child]['children'], list):
                    state[child]['children'] = [state[child]['children']]
                children.extend(state[child]['children'])
                failed_children.add(child)
            errors.append(subtask)
            bs = 'failed'

        if len(subtask['children']) == 0 and not subtask[
                'housekeeping'] and subtask['result'] is not None:
            # try to find statistics results
            parents = [task_id] + subtask['parents']
            misc = None
            for parent in parents:
                parents.extend(state[parent]['parents'])
                if 'misc' in state[parent]:
                    misc = state[parent]['misc']
                    break
            results.append((subtask['result'], subtask['root_document'], misc))

    final = '(final)' if not expected - failed - done - len(
        failed_children) else ''
    click.echo(' {} {}\n'.format(bs, final))
    click.echo('{}/{} tasks completed. {} running.\n'.format(
        done, len(state), running))
    click.secho('Output files:\n', underline=True)
    results = sorted(results, key=lambda x: x[0][1])
    if results and host:
        for doc in results:
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(
                    doc[1], doc[0], 100 * doc[2]['edit_ratio'],
                    doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1], doc[0]))
    elif results:
        from nidaba import storage
        for doc in results:
            output = click.format_filename(storage.get_abs_path(*doc[0]))
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(
                    doc[1][1], output, 100 * doc[2]['edit_ratio'],
                    doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1][1], output))
    if errors:
        click.secho('\nErrors:\n', underline=True)
        for task in errors:
            tb = ''
            args = ''
            if verbose > 0:
                tb = task['errors'][2]
            if verbose > 1:
                task['errors'][0].pop('method')
                args = ', ' + str(task['errors'][0])
            click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0],
                                                     task['root_document'][1],
                                                     args, tb,
                                                     task['errors'][1]))
Ejemplo n.º 2
0
def batch(files, host, preprocessing, binarize, ocr, segmentation, stats,
          postprocessing, output, grayscale, help_tasks):
    """
    Add a new job to the pipeline.
    """

    if host:
        batch = NetworkSimpleBatch(host)
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch.create_batch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
        for doc in files:

            def callback(monitor):
                spin(u'Uploading {}'.format(doc))

            batch.add_document(doc, callback)
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)
    else:
        from nidaba import storage
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch = SimpleBatch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        for doc in files:
            shutil.copy2(doc,
                         storage.get_abs_path(batch.id, os.path.basename(doc)))
            batch.add_document((batch.id, os.path.basename(doc)))
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
    click.echo(u'Building batch\t\t\t[', nl=False)
    if not grayscale:
        batch.add_task('img', 'rgb_to_gray')
    if preprocessing:
        for alg in preprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('img', alg[0], **kwargs)
    if binarize:
        for alg in binarize:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('binarize', alg[0], **kwargs)
    if segmentation:
        for alg in segmentation:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('segmentation', alg[0], **kwargs)
    if ocr:
        for alg in ocr:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('ocr', alg[0], **kwargs)
    if stats:
        for alg in stats:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('stats', alg[0], **kwargs)
    if postprocessing:
        for alg in postprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('postprocessing', alg[0], **kwargs)
    if output:
        for alg in output:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('output', alg[0], **kwargs)
    batch.run()
    click.secho(u'\u2713', fg='green', nl=False)
    click.echo(']')
    click.echo(batch.id)
Ejemplo n.º 3
0
Archivo: cli.py Proyecto: ryanfb/nidaba
def status(verbose, host, job_id):
    """
    Diplays the status and results of jobs.
    """
    if host:
        batch = NetworkSimpleBatch(host, job_id)
    else:
        batch = SimpleBatch(job_id)

    state = batch.get_extended_state()

    click.secho('Status:', underline=True, nl=False)
    if not state:
        click.echo(' UNKNOWN')
        return

    bs = 'success'
    done = 0
    running = 0
    pending = 0
    failed = 0
    results = []
    errors = []
    expected = len(state)
    failed_children = set()
    for task_id, subtask in state.iteritems():
        if subtask['state'] == 'SUCCESS':
            done += 1
        elif subtask['state'] == 'RUNNING':
            running += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'PENDING':
            pending += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'FAILURE':
            failed += 1
            children = []
            if not isinstance(subtask['children'], list):
                subtask['children'] = [subtask['children']]
            for child in subtask['children']:
                if not isinstance(state[child]['children'], list):
                    state[child]['children'] = [state[child]['children']]
                children.extend(state[child]['children'])
                failed_children.add(child)
            errors.append(subtask)
            bs = 'failed'

        if len(subtask['children']) == 0 and not subtask['housekeeping'] and subtask['result'] is not None:
            # try to find statistics results
            parents = [task_id] + subtask['parents']
            misc = None
            for parent in parents:
                parents.extend(state[parent]['parents'])
                if 'misc' in state[parent]:
                    misc = state[parent]['misc']
                    break
            results.append((subtask['result'], subtask['root_document'], misc))

    final = '(final)' if not expected - failed - done - len(failed_children) else ''
    click.echo(' {} {}\n'.format(bs, final))
    click.echo('{}/{} tasks completed. {} running.\n'.format(done, len(state), running))
    click.secho('Output files:\n', underline=True)
    results = sorted(results, key=lambda x: x[0][1])
    if results and host:
        for doc in results:
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(doc[1], 
                                                                 doc[0],
                                                                 100 *
                                                                 doc[2]['edit_ratio'],
                                                                 doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1], doc[0]))
    elif results:
        from nidaba import storage
        for doc in results:
            output = click.format_filename(storage.get_abs_path(*doc[0]))
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(doc[1][1], 
                                                                 output,
                                                                 100 *
                                                                 doc[2]['edit_ratio'],
                                                                 doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1][1], output))
    if errors:
        click.secho('\nErrors:\n', underline=True)
        for task in errors:
            tb = ''
            args = ''
            if verbose > 0:
                tb = task['errors'][2]
            if verbose > 1:
                task['errors'][0].pop('method')
                args = ', ' + str(task['errors'][0])
            click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0],
                                                     task['root_document'][1],
                                                     args,
                                                     tb,
                                                     task['errors'][1]))
Ejemplo n.º 4
0
Archivo: cli.py Proyecto: ryanfb/nidaba
def batch(files, host, preprocessing, binarize, ocr, segmentation, stats,
          postprocessing, output, grayscale, help_tasks):
    """
    Add a new job to the pipeline.
    """
   
    if host:
        batch = NetworkSimpleBatch(host)
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch.create_batch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
        for doc in files:
            def callback(monitor):
                spin(u'Uploading {}'.format(doc))
            batch.add_document(doc, callback)
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)
    else:
        from nidaba import storage
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch = SimpleBatch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        for doc in files:
            shutil.copy2(doc, storage.get_abs_path(batch.id, os.path.basename(doc)))
            batch.add_document((batch.id, os.path.basename(doc)))
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
    click.echo(u'Building batch\t\t\t[', nl=False)
    if not grayscale:
        batch.add_task('img', 'rgb_to_gray')
    if preprocessing:
        for alg in preprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('img', alg[0], **kwargs)
    if binarize:
        for alg in binarize:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('binarize', alg[0], **kwargs)
    if segmentation:
        for alg in segmentation:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('segmentation', alg[0], **kwargs)
    if ocr:
        for alg in ocr:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('ocr', alg[0], **kwargs)
    if stats:
        for alg in stats:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('stats', alg[0], **kwargs)
    if postprocessing:
        for alg in postprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('postprocessing', alg[0], **kwargs)
    if output:
        for alg in output:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('output', alg[0], **kwargs)
    batch.run()
    click.secho(u'\u2713', fg='green', nl=False)
    click.echo(']')
    click.echo(batch.id)
Ejemplo n.º 5
0
def status(verbose, host, job_id):
    """
    Diplays the status and results of jobs.
    """
    click.secho('Status:', underline=True, nl=False)
    if host:
        batch = NetworkSimpleBatch(host, job_id)
    else:
        try:
            batch = Batch(job_id)
        except NidabaInputException:
            click.echo(' UNKNOWN')
            return

    state = batch.get_extended_state()
    if not state:
        click.echo(' UNKNOWN')
        return

    bs = 'success'
    done = 0
    running = 0
    pending = 0
    failed = 0
    results = []
    errors = []
    expected = len(state)
    for task_id, subtask in state.iteritems():
        if subtask['state'] == 'SUCCESS':
            done += 1
        elif subtask['state'] == 'RUNNING':
            running += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'PENDING':
            pending += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'FAILURE':
            errors.append(subtask)
            bs = 'failed'

        # leaf nodes/result extraction
        if len(subtask['children']) == 0 and subtask['result'] is not None:
            # try to find statistics results
            parents = [task_id] + subtask['parents']
            misc = None
            for parent in parents:
                parents.extend(state[parent]['parents'])
                if 'misc' in state[parent]:
                    misc = state[parent]['misc']
                    break
            # archival tasks bunch everything together. do a sort-based matching of input and output tasks
            if isinstance(subtask['result'][0],
                          list) or host and len(subtask['result']) > 1:
                for res, rd in zip(sorted(subtask['result']),
                                   sorted(subtask['root_documents'])):
                    if host:
                        res = [res]
                    results.append((res, [rd], misc))
            else:
                results.append(
                    (subtask['result'], subtask['root_documents'], misc))

    final = '(final)' if expected - done == 0 else ''
    click.echo(' {} {}\n'.format(bs, final))
    click.echo('{}/{} tasks completed. {} running.\n'.format(
        done, expected, running))

    # render results
    click.secho('Output files:\n', underline=True)
    results = sorted(results,
                     key=lambda x: x[0][0][1]
                     if isinstance(x[0], list) else x[0][1])
    if results:
        for doc in results:
            if host:
                output = ', '.join(doc[0])
                input = ', '.join(doc[1])
            else:
                from nidaba import storage
                if isinstance(doc[0][0], list):
                    for d in doc:
                        output = ', '.join(
                            click.format_filename(storage.get_abs_path(*d)))
                else:
                    output = click.format_filename(
                        storage.get_abs_path(*doc[0]))
                input = ', '.join(d[1] for d in doc[1])
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(
                    input, output, 100 * doc[2]['edit_ratio'],
                    doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(input, output))

    # render errors
    if errors:
        click.secho('\nErrors:\n', underline=True)
        for task in errors:
            tb = ''
            args = ''
            if verbose > 0:
                tb = task['errors'][2]
            if verbose > 1:
                task['errors'][0].pop('method')
                args = ', ' + str(task['errors'][0])
            if host:
                rd = ', '.join(
                    os.path.basename(x) for x in task['root_documents'])
            else:
                rd = ', '.join(
                    os.path.basename(x[1]) for x in task['root_documents'])
            click.echo('{}.{} ({}{}): {}{}'.format(task['task'][0],
                                                   task['task'][1], rd, args,
                                                   tb, task['errors'][1]))
Ejemplo n.º 6
0
def status(verbose, host, job_id):
    """
    Diplays the status and results of jobs.
    """
    click.secho('Status:', underline=True, nl=False)
    if host:
        batch = NetworkSimpleBatch(host, job_id)
    else:
        try:
            batch = Batch(job_id)
        except NidabaInputException:
            click.echo(' UNKNOWN')
            return

    state = batch.get_extended_state()
    if not state:
        click.echo(' UNKNOWN')
        return

    bs = 'success'
    done = 0
    running = 0
    pending = 0
    failed = 0
    results = []
    errors = []
    expected = len(state)
    for task_id, subtask in state.iteritems():
        if subtask['state'] == 'SUCCESS':
            done += 1
        elif subtask['state'] == 'RUNNING':
            running += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'PENDING':
            pending += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'FAILURE':
            errors.append(subtask)
            bs = 'failed'

        # leaf nodes/result extraction
        if len(subtask['children']) == 0 and subtask['result'] is not None:
            # try to find statistics results
            parents = [task_id] + subtask['parents']
            misc = None
            for parent in parents:
                parents.extend(state[parent]['parents'])
                if 'misc' in state[parent]:
                    misc = state[parent]['misc']
                    break
            # archival tasks bunch everything together. do a sort-based matching of input and output tasks
            if isinstance(subtask['result'][0], list) or (host and isinstance(subtask['result'], list)):
                for res, rd in zip(sorted(subtask['result']), sorted(subtask['root_documents'])):
                    if host:
                        res = [res]
                    results.append((res, [rd], misc))
            else:
                if host:
                    subtask['result'] = [subtask['result']]
                results.append((subtask['result'], subtask['root_documents'], misc))

    final = '(final)' if expected - done == 0 else ''
    click.echo(' {} {}\n'.format(bs, final))
    click.echo('{}/{} tasks completed. {} running.\n'.format(done, expected, running))

    # render results
    click.secho('Output files:\n', underline=True)
    results = sorted(results, key=lambda x: x[0][0][1] if isinstance(x[0], list) else x[0][1])
    if results:
        for doc in results:
            if host:
                output = ', '.join(doc[0])
                input = ', '.join(doc[1])
            else:
                from nidaba import storage
                if isinstance(doc[0][0], list):
                    for d in doc:
                        output = ', '.join(click.format_filename(storage.get_abs_path(*d)))
                else:
                    output = click.format_filename(storage.get_abs_path(*doc[0]))
                input = ', '.join(d[1] for d in doc[1])
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(input,
                                                                 output,
                                                                 100 *
                                                                 doc[2]['edit_ratio'],
                                                                 doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(input, output))

    # render errors
    if errors:
        click.secho('\nErrors:\n', underline=True)
        for task in errors:
            tb = ''
            args = ''
            if verbose > 0:
                tb = task['errors'][2]
            if verbose > 1:
                task['errors'][0].pop('method')
                args = ', ' + str(task['errors'][0])
            if host:
                rd = ', '.join(os.path.basename(x) for x in task['root_documents'])
            else:
                rd = ', '.join(os.path.basename(x[1]) for x in task['root_documents'])
            click.echo('{}.{} ({}{}): {}{}'.format(task['task'][0],
                                                   task['task'][1],
                                                   rd,
                                                   args,
                                                   tb,
                                                   task['errors'][1]))