예제 #1
0
파일: api.py 프로젝트: mirskiy/nidaba
    def post(self):
        """
        Creates a new batch and returns it identifier.

        ** Request **
    
        .. sourcecode:: http
    
            POST /batch
    
        ** Response **
    
        .. sourcecode:: http
    
            HTTP/1.1 201 CREATED

            {
                "id": "78a1f1e4-cc76-40ce-8a98-77b54362a00e", 
                "url": "/batch/78a1f1e4-cc76-40ce-8a98-77b54362a00e"
            }
    
        :status 201: Successfully created
        """
        log.debug('Routing to batch with POST')
        batch = SimpleBatch()
        data = {'id': batch.id, 'url': url_for('api.batch', batch_id=batch.id)}
        log.debug('Created batch {}'.format(batch.id))
        return data, 201
예제 #2
0
파일: api.py 프로젝트: mirskiy/nidaba
    def post(self, batch_id):
        """
        Adds a page (really any type of file) to the batch identified by
        *batch_id*.

        ** Request **

            POST /batch/:batch/pages

        ** Response **

            HTTP/1.1 201 OK
            
            [
                {
                    "name": "0033.tif", 
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif"
                }
            ]

        :form scans: file(s) to add to the batch

        :status 201: task created
        :status 403: file couldn't be created
        :status 404: batch not found
        """
        args = self.parser.parse_args()
        log.debug('Routing to pages {} of {} (POST)'.format(
            [x.filename for x in args['scans']], batch_id))
        try:
            batch = SimpleBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        data = []
        for file in args['scans']:
            try:
                fp = storage.StorageFile(batch_id, file.filename, 'wb')
            except NidabaStorageViolationException as e:
                log.debug('Failed to write file {}'.format(file.filename),
                          exc_info=True)
                return {'message': str(e)}, 403
            else:
                with fp:
                    file.save(fp)
                    file.close()
                    if args['auxiliary'] is False:
                        log.debug('Adding {}/{} to {}'.format(
                            fp.storage_path[0], fp.storage_path[1], batch_id))
                        batch.add_document(fp.storage_path)
            data.append({
                'name':
                file.filename,
                'url':
                url_for('api.page', batch=batch_id, file=file.filename)
            })
        return data, 201
예제 #3
0
파일: api.py 프로젝트: mirskiy/nidaba
    def get(self, batch_id):
        """
        Retrieves the state of batch *batch_id*.
    
        ** Request **
    
        .. sourcecode:: http
    
            GET /batch/:batch_id
    
        ** Response **
    
        .. sourcecode:: http
    
            HTTP/1.1 200 OK

        :param batch_id: batch identifier
        :type batch_id: string
        :status 200: No error
        :status 404: No such batch
        """
        log.debug('Routing to batch {} (GET)'.format(batch_id))
        res = {}
        try:
            batch = SimpleBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        res['pages'] = url_for('api.batchpages', batch_id=batch_id)
        res['tasks'] = url_for('api.batchtasks', batch_id=batch_id)
        if batch.is_running():
            res['chains'] = batch.get_extended_state()

            # replace all document tuples with URLs to the page resource
            def replace_docs(state):
                for k in state.keys():
                    if k in ['root_document', 'result', 'doc']:
                        if state[k] is not None and isinstance(
                                state[k][0], list):
                            docs = []
                            for doc in state[k]:
                                docs.append(
                                    url_for('api.page',
                                            batch=doc[0],
                                            file=doc[1]))
                            state[k] = docs
                        elif state[k] is not None:
                            state[k] = url_for('api.page',
                                               batch=state[k][0],
                                               file=state[k][1])
                    if isinstance(state[k], dict):
                        replace_docs(state[k])

            replace_docs(res['chains'])
        return res, 200
예제 #4
0
파일: api.py 프로젝트: mirskiy/nidaba
    def get(self, batch_id):
        """
        Returns the list of pages associated with the batch with *batch_id*.

        ** Request **
    
        .. sourcecode:: http
    
            GET /batch/:batch/pages
    
        ** Response **
    
        .. sourcecode:: http
    
            HTTP/1.1 200 OK

            [
                {
                    "name": "0033.tif", 
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif"
                }, 
                {
                    "name": "0072.tif", 
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0072.tif"
                }, 
                {
                    "name": "0014.tif", 
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0014.tif"
                }
            ]

        :status 200: success
        :status 404: batch not found
        """
        log.debug('Routing to pages of {} (GET)'.format(batch_id))
        try:
            batch = SimpleBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        data = []
        for doc in batch.get_documents():
            data.append({
                'name': doc[1],
                'url': url_for('api.page', batch=doc[0], file=doc[1])
            })
        return data, 200
예제 #5
0
파일: api.py 프로젝트: mirskiy/nidaba
  def post(self, batch_id):
      """
      Executes batch with identifier *batch_id*
  
      ** Request **
  
      .. sourcecode:: http
  
          POST /batch/:batch_id
  
      ** Response **
  
      .. sourcecode:: http
  
          HTTP/1.1 202 ACCEPTED
 
      :param batch_id: batch's unique id
      :type batch_id: string 
      :status 202: Successfully executed
      :status 400: Batch could not be executed
      :status 404: No such batch
      :status 409: Trying to reexecute an already executed batch
      """
      log.debug('Routing to batch {} (POST)'.format(batch_id))
      try:
          batch = SimpleBatch(batch_id)
      except:
          log.debug('Batch {} not found'.format(batch_id))
          return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
      if batch.get_state() == 'NONE':
          try:
              batch.run()
              return {
                  'id': batch_id,
                  'url': url_for('api.batch', batch_id=batch_id)
              }, 202
          except:
              log.debug('Batch {} could not be executed'.format(batch_id),
                        exc_info=True)
              return {'message': 'Batch could not be executed'}, 400
      else:
          log.debug('Batch {} already executed'.format(batch_id))
          return {'message': 'Batch already executed'}, 409
예제 #6
0
def status(verbose, host, job_id):
    """
    Diplays the status and results of jobs.
    """
    if host:
        batch = NetworkSimpleBatch(host, job_id)
    else:
        batch = SimpleBatch(job_id)

    state = batch.get_extended_state()

    click.secho('Status:', underline=True, nl=False)
    if not state:
        click.echo(' UNKNOWN')
        return

    bs = 'success'
    done = 0
    running = 0
    pending = 0
    failed = 0
    results = []
    errors = []
    expected = len(state)
    failed_children = set()
    for task_id, subtask in state.iteritems():
        if subtask['state'] == 'SUCCESS':
            done += 1
        elif subtask['state'] == 'RUNNING':
            running += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'PENDING':
            pending += 1
            if bs == 'success':
                bs = 'pending'
        elif subtask['state'] == 'FAILURE':
            failed += 1
            children = []
            if not isinstance(subtask['children'], list):
                subtask['children'] = [subtask['children']]
            for child in subtask['children']:
                if not isinstance(state[child]['children'], list):
                    state[child]['children'] = [state[child]['children']]
                children.extend(state[child]['children'])
                failed_children.add(child)
            errors.append(subtask)
            bs = 'failed'

        if len(subtask['children']) == 0 and not subtask[
                'housekeeping'] and subtask['result'] is not None:
            # try to find statistics results
            parents = [task_id] + subtask['parents']
            misc = None
            for parent in parents:
                parents.extend(state[parent]['parents'])
                if 'misc' in state[parent]:
                    misc = state[parent]['misc']
                    break
            results.append((subtask['result'], subtask['root_document'], misc))

    final = '(final)' if not expected - failed - done - len(
        failed_children) else ''
    click.echo(' {} {}\n'.format(bs, final))
    click.echo('{}/{} tasks completed. {} running.\n'.format(
        done, len(state), running))
    click.secho('Output files:\n', underline=True)
    results = sorted(results, key=lambda x: x[0][1])
    if results and host:
        for doc in results:
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(
                    doc[1], doc[0], 100 * doc[2]['edit_ratio'],
                    doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1], doc[0]))
    elif results:
        from nidaba import storage
        for doc in results:
            output = click.format_filename(storage.get_abs_path(*doc[0]))
            if doc[2] is not None:
                click.echo(u'{} \u2192 {} ({:.1f}% / {})'.format(
                    doc[1][1], output, 100 * doc[2]['edit_ratio'],
                    doc[2]['ground_truth'][1]))
            else:
                click.echo(u'{} \u2192 {}'.format(doc[1][1], output))
    if errors:
        click.secho('\nErrors:\n', underline=True)
        for task in errors:
            tb = ''
            args = ''
            if verbose > 0:
                tb = task['errors'][2]
            if verbose > 1:
                task['errors'][0].pop('method')
                args = ', ' + str(task['errors'][0])
            click.echo('{0} ({1}{2}): {3}{4}'.format(task['task'][0],
                                                     task['root_document'][1],
                                                     args, tb,
                                                     task['errors'][1]))
예제 #7
0
def batch(files, host, preprocessing, binarize, ocr, segmentation, stats,
          postprocessing, output, grayscale, help_tasks):
    """
    Add a new job to the pipeline.
    """

    if host:
        batch = NetworkSimpleBatch(host)
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch.create_batch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
        for doc in files:

            def callback(monitor):
                spin(u'Uploading {}'.format(doc))

            batch.add_document(doc, callback)
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)
    else:
        from nidaba import storage
        click.echo(u'Preparing filestore\t\t[', nl=False)
        try:
            batch = SimpleBatch()
        except:
            click.secho(u'\u2717', fg='red', nl=False)
            click.echo(']')
            exit()
        for doc in files:
            shutil.copy2(doc,
                         storage.get_abs_path(batch.id, os.path.basename(doc)))
            batch.add_document((batch.id, os.path.basename(doc)))
        click.secho(u'\u2713', fg='green', nl=False)
        click.echo(']')
    click.echo(u'Building batch\t\t\t[', nl=False)
    if not grayscale:
        batch.add_task('img', 'rgb_to_gray')
    if preprocessing:
        for alg in preprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('img', alg[0], **kwargs)
    if binarize:
        for alg in binarize:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('binarize', alg[0], **kwargs)
    if segmentation:
        for alg in segmentation:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('segmentation', alg[0], **kwargs)
    if ocr:
        for alg in ocr:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('ocr', alg[0], **kwargs)
    if stats:
        for alg in stats:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('stats', alg[0], **kwargs)
    if postprocessing:
        for alg in postprocessing:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('postprocessing', alg[0], **kwargs)
    if output:
        for alg in output:
            for kwargs in alg[1]:
                kwargs = move_to_storage(batch, kwargs)
                batch.add_task('output', alg[0], **kwargs)
    batch.run()
    click.secho(u'\u2713', fg='green', nl=False)
    click.echo(']')
    click.echo(batch.id)
예제 #8
0
파일: api.py 프로젝트: mirskiy/nidaba
    def post(self, batch_id, group=None, task=None):
        """
        Adds a particular configuration of a task to the batch identified by
        *batch_id*.

        ** Request **

            POST /batch/:batch_id/:group/:task

            {
                kwarg_1: "value",
                kwarg_2: 10,
                kwarg_3: 'true',
                kwarg_4: ["a", "b"],
                kwarg_5: '/pages/:batch_id/path'
            }

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 201 CREATED

        To post files as arguments use their URL returned by the call that
        created them on the batch. Booleans are strings containing either the
        values 'True'/'true' or 'False'/'false'.

        :status 201: task created
        :status 404: batch, group, or task not found.
        """
        log.debug('Routing to task {}.{} of {} (POST)'.format(
            group, task, batch_id))
        try:
            batch = SimpleBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        try:

            def arg_conversion(s):
                # JSON does not support booleans
                if s in ['True', 'true']:
                    return True
                elif s in ['False', 'false']:
                    return False
                # XXX: find a nicer way to rewrite page URLs
                base_url = url_for('api.page', batch=batch_id, file='')
                if isinstance(s, basestring) and s.startswith(base_url):
                    rem = s.replace(base_url, '', 1)
                    return (batch_id, rem)
                return s

            kwargs = {
                k: arg_conversion(v)
                for k, v in request.get_json().iteritems()
            }
            batch.add_task(group, task, **kwargs)
        except Exception as e:
            log.debug('Adding task {} to {} failed: {}'.format(
                task, batch_id, str(e)))
            return {'message': str(e)}, 422
        return {}, 201
예제 #9
0
파일: api.py 프로젝트: mirskiy/nidaba
    def get(self, batch_id, group=None, task=None):
        """
        Retrieves the list of tasks and their argument values associated with a
        batch, optionally limited to a specific group.

        ** Request **
    
        .. sourcecode:: http

            GET /batch/:batch_id/tasks    
    
        ** Response **
    
        .. sourcecode:: http
    
            HTTP/1.1 200 OK
            
            {
                "segmentation": [
                    ["tesseract", {}]
                ],
                "ocr": [
                    ["kraken", 
                        {
                            "model": "teubner", 
                        }
                    ]
                ]
            }


        To limit output to a specific group of tasks, e.g. segmentation or
        binarization append the group to the URL:

        ** Request **

        .. sourcecode:: http

            GET /batch/:batch_id/tasks/:group

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            {
                'group': [
                    ["tesseract", {}],
                    ["kraken", {}]
                ]
            }

        :status 200: success
        :status 404: batch, group, or task not found.
        """
        log.debug('Routing to task {}.{} of {} (GET)'.format(
            group, task, batch_id))
        try:
            batch = SimpleBatch(batch_id)
        except:
            log.debug('Batch {} not found'.format(batch_id))
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        tasks = batch.get_tasks()
        if group and group not in tasks:
            log.debug('Unknown group {} ({})'.format(group, batch_id))
            return {'message': 'Unknown group {}'.format(group)}, 404
        elif task and task not in tasks[group]:
            log.debug('Unknown task {}.{} ({})'.format(group, task, batch_id))
            return {'message': 'Unknown task {}'.format(task)}, 404
        if group:
            tasks = {group: tasks[group]}
        if task:
            tasks = {group: {task: tasks[group][task]}}
        return tasks, 200