Exemple #1
0
    def post(self):
        """
        Creates a new batch and returns it identifier.

        ** Request **

        .. sourcecode:: http

            POST /batch

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 201 CREATED

            {
                "id": "78a1f1e4-cc76-40ce-8a98-77b54362a00e",
                "url": "/batch/78a1f1e4-cc76-40ce-8a98-77b54362a00e"
            }

        :status 201: Successfully created
        """
        log.debug('Routing to batch with POST')
        batch = nBatch()
        data = {'id': batch.id, 'url': url_for('api.batch', batch_id=batch.id)}
        log.debug('Created batch {}'.format(batch.id))
        return data, 201
Exemple #2
0
    def delete(self, batch_id):
        """
        Removes a page (really any type of file) from the batch identified by
        *batch_id*.

        ** Request **

            DELETE /batch/:batch/pages

            {
                'scans': ['0033.tif', '0034.tif']
            }

        ** Response **

            HTTP/1.1 204 

        :status 204: file deleted
        :status 404: batch not found
        """
        args = request.get_json()
        print(request.data)
        log.debug('Routing to pages {} of {} (DELETE)'.format(
            args['scans'], batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        data = []
        for file in args['scans']:
            try:
                batch.rm_document((batch_id, file))
            except NidabaInputException as e:
                return {'message': str(e)}, 403
        return {}, 204
Exemple #3
0
    def delete(self, batch_id, group=None, task=None):
        """
        Removes a particular configuration of a task from the batch identified by
        *batch_id*.

        ** Request **

            DELETE /batch/:batch_id/:group/:task

            {
                kwarg_1: "value",
                kwarg_2: 10,
                kwarg_3: 'true',
                kwarg_4: ["a", "b"],
                kwarg_5: '/pages/:batch_id/path'
            }

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 204 No Content

        :status 204: task deleted
        :status 404: batch, group, or task not found.
        """
        log.debug('Routing to task {}.{} of {} (DELETE)'.format(
            group, task, batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        try:

            def arg_conversion(s):
                # JSON does not support booleans
                if s in ['True', 'true']:
                    return True
                elif s in ['False', 'false']:
                    return False
                # XXX: find a nicer way to rewrite page URLs
                base_url = url_for('api.page', batch=batch_id, file='')
                if isinstance(s, basestring) and s.startswith(base_url):
                    rem = s.replace(base_url, '', 1)
                    return (batch_id, rem)
                return s

            kwargs = {
                k: arg_conversion(v)
                for k, v in request.get_json().iteritems()
            }
            batch.rm_task(group, task, **kwargs)
        except Exception as e:
            log.debug('Removing task {} from {} failed: {}'.format(
                task, batch_id, str(e)))
            return {'message': str(e)}, 404
        return {}, 204
Exemple #4
0
    def post(self, batch_id):
        """
        Adds a page (really any type of file) to the batch identified by
        *batch_id*.

        ** Request **

            POST /batch/:batch/pages

        ** Response **

            HTTP/1.1 201 OK

            [
                {
                    "name": "0033.tif",
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif"
                }
            ]

        :form scans: file(s) to add to the batch

        :status 201: file created
        :status 403: file couldn't be created
        :status 404: batch not found
        """
        args = self.parser.parse_args()
        log.debug('Routing to pages {} of {} (POST)'.format(
            [x.filename for x in args['scans']], batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        data = []
        for file in args['scans']:
            try:
                fp = storage.StorageFile(batch_id, file.filename, 'wb')
            except NidabaStorageViolationException as e:
                log.debug('Failed to write file {}'.format(file.filename),
                          exc_info=True)
                return {'message': str(e)}, 403
            else:
                with fp:
                    file.save(fp)
                    file.close()
                    if args['auxiliary'] is False:
                        log.debug('Adding {}/{} to {}'.format(
                            fp.storage_path[0], fp.storage_path[1], batch_id))
                        batch.add_document(fp.storage_path)
            data.append({
                'name':
                file.filename,
                'url':
                url_for('api.page', batch=batch_id, file=file.filename)
            })
        return data, 201
Exemple #5
0
    def get(self, batch_id):
        """
        Retrieves the state of batch *batch_id*.

        ** Request **

        .. sourcecode:: http

            GET /batch/:batch_id

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

        :param batch_id: batch identifier
        :type batch_id: string
        :status 200: No error
        :status 404: No such batch
        """
        log.debug('Routing to batch {} (GET)'.format(batch_id))
        res = {}
        try:
            batch = nBatch(batch_id)
        except NidabaInputException:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        res['pages'] = url_for('api.batchpages', batch_id=batch_id)
        res['tasks'] = url_for('api.batchtasks', batch_id=batch_id)
        if batch.is_running():
            res['chains'] = batch.get_extended_state()

            # replace all document tuples with URLs to the page resource
            def replace_docs(state):
                for k in state.keys():
                    if k in ['root_documents', 'result', 'doc']:
                        if state[k] is not None and isinstance(
                                state[k][0], list):
                            docs = []
                            for doc in state[k]:
                                docs.append(
                                    url_for('api.page',
                                            batch=doc[0],
                                            file=doc[1]))
                            state[k] = docs
                        elif state[k] is not None:
                            state[k] = url_for('api.page',
                                               batch=state[k][0],
                                               file=state[k][1])
                    if isinstance(state[k], dict):
                        replace_docs(state[k])

            replace_docs(res['chains'])
        return res, 200
Exemple #6
0
    def post(self, batch_id, group=None, task=None):
        """
        Adds a particular configuration of a task to the batch identified by
        *batch_id*.

        ** Request **

            POST /batch/:batch_id/:group/:task

            {
                kwarg_1: "value",
                kwarg_2: 10,
                kwarg_3: 'true',
                kwarg_4: ["a", "b"],
                kwarg_5: '/pages/:batch_id/path'
            }

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 201 CREATED

        To post files as arguments use their URL returned by the call that
        created them on the batch. Booleans are strings containing either the
        values 'True'/'true' or 'False'/'false'.

        :status 201: task created
        :status 404: batch, group, or task not found.
        :status 422: invalid task configuration
        """
        log.debug('Routing to task {}.{} of {} (POST)'.format(group, task, batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        try:
            def arg_conversion(s):
                # JSON does not support booleans
                if s in ['True', 'true']:
                    return True
                elif s in ['False', 'false']:
                    return False
                # XXX: find a nicer way to rewrite page URLs
                base_url = url_for('api.page', batch=batch_id, file='')
                if isinstance(s, basestring) and s.startswith(base_url):
                    rem = s.replace(base_url, '', 1)
                    return (batch_id, rem)
                return s
            kwargs = {k: arg_conversion(v) for k, v in request.get_json().iteritems()}
            batch.add_task(group, task, **kwargs)
        except Exception as e:
            log.debug('Adding task {} to {} failed: {}'.format(task, batch_id, str(e)))
            return {'message': str(e)}, 422
        return {}, 201
Exemple #7
0
    def get(self, batch_id):
        """
        Returns the list of pages associated with the batch with *batch_id*.

        ** Request **

        .. sourcecode:: http

            GET /batch/:batch/pages

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            [
                {
                    "name": "0033.tif",
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0033.tif"
                },
                {
                    "name": "0072.tif",
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0072.tif"
                },
                {
                    "name": "0014.tif",
                    "url": "/pages/63ca3ec7-2592-4c7d-9009-913aac42535d/0014.tif"
                }
            ]

        :status 200: success
        :status 404: batch not found
        """
        log.debug('Routing to pages of {} (GET)'.format(batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        data = []
        for doc in batch.get_documents():
            data.append({
                'name': doc[1],
                'url': url_for('api.page', batch=doc[0], file=doc[1])
            })
        return data, 200
Exemple #8
0
    def post(self, batch_id):
        """
        Executes batch with identifier *batch_id*

        ** Request **

        .. sourcecode:: http

            POST /batch/:batch_id

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 202 ACCEPTED

        :param batch_id: batch's unique id
        :type batch_id: string
        :status 202: Successfully executed
        :status 400: Batch could not be executed
        :status 404: No such batch
        :status 409: Trying to reexecute an already executed batch
        """
        log.debug('Routing to batch {} (POST)'.format(batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            log.debug('Batch {} not found'.format(batch_id))
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        if batch.get_state() == 'NONE':
            if not batch.docs:
                log.debug('Batch {} not executable (no docs)'.format(batch_id))
                return {'message': 'Batch does not contain docs'}, 400
            try:
                batch.run()
                return {
                    'id': batch_id,
                    'url': url_for('api.batch', batch_id=batch_id)
                }, 202
            except:
                log.debug('Batch {} could not be executed'.format(batch_id),
                          exc_info=True)
                return {'message': 'Batch could not be executed'}, 400
        else:
            log.debug('Batch {} already executed'.format(batch_id))
            return {'message': 'Batch already executed'}, 409
Exemple #9
0
    def get(self, group=None, task=None):
        """
        Retrieves the list of available tasks, their arguments and valid values
        for those arguments.

        ** Request **

        .. sourcecode:: http

            GET /tasks

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            {
                "img": {
                    "deskew": {},
                    "dewarp": {},
                    "rgb_to_gray": {}
                },
                "binarize": {
                    "nlbin": {
                        "border": "float",
                        "escale": "float",
                        "high": [
                            0,
                            100
                        ],
                        "low": [
                            0,
                            100
                        ],
                    },
                    "otsu": {},
                    "sauvola": {
                        "factor": [
                            0.0,
                            1.0
                        ],
                        "whsize": "int"
                    }
                }
                "segmentation": {
                    "kraken": {},
                    "tesseract": {}
                },
                "ocr": {
                    "kraken": {
                        "model": [
                            "fraktur.pyrnn.gz",
                            "default",
                            "teubner"
                        ]
                    },
                    "tesseract": {
                        "extended": [
                            false,
                            true
                        ],
                        "languages": [
                            "chr",
                            "chi_tra",
                            "ita_old",
                            "ceb",
                        ]
                    }
                },
                "postprocessing": {
                    "spell_check": {
                        "filter_punctuation": [
                            true,
                            false
                        ],
                        "language": [
                            "latin",
                            "polytonic_greek"
                        ]
                    }
                },
                "output": {
                    "metadata": {
                        "metadata": "file",
                        "validate": [
                            true,
                            false
                        ]
                    },
                    "tei2hocr": {},
                    "tei2simplexml": {},
                    "tei2txt": {}
                }
            }

        It is also possible to retrieve only a subset of task definitions by
        adding to the request a task group and/or the task name:

        ** Request **

        .. sourcecode:: http

            GET /tasks/segmentation

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            {
                "segmentation": {
                    "kraken": {},
                    "tesseract": {}
                }
            }

        Currently there are 4 different argument types:

            * "int": An integer
            * "float": A float (floats serialized to integers, i.e. 1.0 to 1
                       are also accepted)
            * "str": An UTF-8 encoded string
            * "file": A file on the storage medium, referenced by its URL

        Finally there are lists of valid argument values where one or more
        values out of the list may be picked and value ranges
        """
        log.debug('Routing to tasks with group {}, method {}'.format(
            group, task))
        tasks = nBatch().get_available_tasks()
        if group and group not in tasks:
            return {'message': 'Unknown group {}'.format(group)}, 404
        elif task and task not in tasks[group]:
            return {'message': 'Unknown task {}'.format(task)}, 404
        if group:
            tasks = {group: tasks[group]}
        if task:
            tasks = {group: {task: tasks[group][task]}}
        return tasks, 200
Exemple #10
0
    def get(self, batch_id, group=None, task=None):
        """
        Retrieves the list of tasks and their argument values associated with a
        batch, optionally limited to a specific group.

        ** Request **

        .. sourcecode:: http

            GET /batch/:batch_id/tasks

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            {
                "segmentation": [
                    ["tesseract", {}]
                ],
                "ocr": [
                    ["kraken",
                        {
                            "model": "teubner",
                        }
                    ]
                ]
            }


        To limit output to a specific group of tasks, e.g. segmentation or
        binarization append the group to the URL:

        ** Request **

        .. sourcecode:: http

            GET /batch/:batch_id/tasks/:group

        ** Response **

        .. sourcecode:: http

            HTTP/1.1 200 OK

            {
                'group': [
                    ["tesseract", {}],
                    ["kraken", {}]
                ]
            }

        :status 200: success
        :status 404: batch, group, or task not found.
        """
        log.debug('Routing to task {}.{} of {} (GET)'.format(
            group, task, batch_id))
        try:
            batch = nBatch(batch_id)
        except:
            log.debug('Batch {} not found'.format(batch_id))
            return {'message': 'Batch Not Found: {}'.format(batch_id)}, 404
        tasks = batch.get_tasks()
        if group and group not in tasks:
            log.debug('Unknown group {} ({})'.format(group, batch_id))
            return {'message': 'Unknown group {}'.format(group)}, 404
        elif task and task not in tasks[group]:
            log.debug('Unknown task {}.{} ({})'.format(group, task, batch_id))
            return {'message': 'Unknown task {}'.format(task)}, 404
        if group:
            tasks = {group: tasks[group]}
        if task:
            tasks = {group: {task: tasks[group][task]}}
        return tasks, 200