Ejemplo n.º 1
0
    def delete(self, corpusId, documentId):
        """Delete a single document an optionally its annotations"""
        try:
            delete_annotations_argument = self.get_query_argument(
                "deleteAnnotations", None)
            if not delete_annotations_argument:
                self.missing_required_field("deleteAnnotations")
                return

            delete_annotations = 'true' == delete_annotations_argument

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            document = corpus.delete_document(documentId, delete_annotations)
            self.write_and_set_status(document, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status(
                {MESSAGE: "Specified document not found"},
                HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 2
0
    def get(self, corpusId, bucketId, annotationId):
        try:
            docType = self.get_argument("schemaType", None)
            if not docType:
                self.write_and_set_status({MESSAGE: "Missing schemaType."},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            anno = get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .get_annotation(annotationId, docType)

            annotationId = anno["id"]
            anno["annotationId"] = anno["id"]
            del anno["id"]
            self.write_and_set_status(anno,
                                      HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id and schemaType does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 3
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            schemaTypesStr = self.get_query_argument("schemaTypes", None)
            bucketIdsStr = self.get_query_argument("bucketIds", None)
            schemaTypes = []
            bucketIds = []
            if schemaTypesStr:
                schemaTypes = schemaTypesStr.split(",")
            if bucketIdsStr:
                bucketIds = bucketIdsStr.split(",")

            batchCorpus = Corpus(envId, authorization, corpusId)
            zipPath = batchCorpus.create_tmp_annotations_zip(
                bucketIds, schemaTypes)
            self.send_zip_file_with_get(zipPath, os.path.basename(zipPath))
            batchCorpus.clear_temporary_files()
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 4
0
    def delete(self, corpusId, bucketId, annotationId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = self.get_argument("schemaType", None)
            if not docType:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.NOT_FOUND)
                return

            get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .delete_annotation(annotationId, docType)
            self.write_and_set_status(None,
                                      HTTPStatus.NO_CONTENT)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 5
0
 def delete(self, corpusId, bucketId):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpus = get_master_document_corpus_list(
             envId, authorization).get_corpus(corpusId)
         corpus.delete_bucket(bucketId)
         self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
     except BucketNotFoundException as err:
         self.write_and_set_status(
             {
                 MESSAGE:
                 "Bucket does not exist.Extra info: '{0}'".format(err)
             }, HTTPStatus.NOT_FOUND)
     except CorpusNotFoundException as err:
         self.write_and_set_status(
             {
                 MESSAGE:
                 "Corpus does not exist.Extra info: '{0}'".format(err)
             }, HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status(
             {
                 MESSAGE: "Internal server error",
                 TRACE: trace
             }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 6
0
    def get(self, corpusId, documentId):
        """Get a single document from corpus"""
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            document = corpus.get_text_document(documentId)

            if document is None:
                raise DocumentNotFoundException(documentId)

            self.write_and_set_status(document, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status(
                {MESSAGE: "Specified document not found"},
                HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 7
0
    def get(self, corpusId, bucketId):
        try:
            schemaTypesArgument = self.get_query_argument("schemaTypes",
                                                          default=None)
            if not schemaTypesArgument:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaTypes parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            else:
                schemaTypes = schemaTypesArgument.split(",")

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentSearch = DocumentSearch(envId, authorization, [], corpusId)

            counts = documentSearch.count_annotations_for_types(
                bucketId, schemaTypes)

            self.write_and_set_status(counts, HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 8
0
    def get(self, corpusId):
        """Get documents from corpus according to pagination"""
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status(
                    {MESSAGE: "'from' must cannot be less than zero"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status(
                    {MESSAGE: "'size' cannot be less than 1"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            size = min(size, MAX_DOCUMENT_SIZE)

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            filterTitle = self.get_query_argument("filterTitle", default=None)
            filterSource = self.get_query_argument("filterSource",
                                                   default=None)
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)
            documents = corpus.get_text_documents(fromIndex, size, sortBy,
                                                  sortOrder, filterTitle,
                                                  filterSource, filterJoin)

            self.write_and_set_status({"documents": documents}, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except ValueError as ve:
            self.write_and_set_status(
                {MESSAGE: "Invalid 'from' or 'size' parameter"},
                HTTPStatus.UNPROCESSABLE_ENTITY)
        except TransportError as te:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "ES TransportError",
                    TRACE: trace
                }, te.status_code)
        except Exception as e:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 9
0
    def put(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing annotationId field required to find an annotation to update."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "bucketId" in body:
                newBucketId = body["bucketId"]
                if newBucketId != bucketId:
                    self.write_and_set_status(
                        {MESSAGE: "bucketId from the path is different than bucketId in the body."},
                        HTTPStatus.UNPROCESSABLE_ENTITY)
                    return

            bucket = get_master_bucket_list(envId, authorization).get_bucket(corpusId, bucketId)
            storedAnnotation = bucket.get_annotation(id=annotationId, docType=docType)
            if storedAnnotation["schemaType"] != docType:
                self.write_and_set_status(
                    {MESSAGE: "You cannot change the schemaType of an annotation."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket.update_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status(None,
                                      HTTPStatus.NO_CONTENT)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 10
0
Archivo: app.py Proyecto: crim-ca/RACS
def set_up_environment():
    try:
        es_wait_ready()
        envId = get_env_id()
        authorization = get_autorisation(envId, None, None)
        envList = get_env_list(authorization)
        envList.get_env(envId)
    except EnvNotFoundException:
        es_wait_ready()
        envList.create_env(envId)
Ejemplo n.º 11
0
    def post(self, corpusId):
        try:

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            body = json.loads(self.request.body.decode("utf-8"))

            zipFileName = body.get("zipFileName")
            destUrl = body.get("destUrl")
            isSendPut = body.get("isSendPut", True)
            schemaTypesStr = body.get("schemaTypes", None)
            bucketIdsStr = body.get("bucketIds", None)
            isMultipart = body.get("isMultipart", False)
            multipartFieldName = body.get("multipartFieldName", "")

            if not zipFileName:
                self.write_and_set_status(
                    {MESSAGE: "Missing 'zipFileName' parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            if not destUrl:
                self.write_and_set_status(
                    {MESSAGE: "Missing 'destUrl' parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            schemaTypes = []
            bucketIds = []
            if schemaTypesStr:
                schemaTypes = schemaTypesStr.split(",")
            if bucketIdsStr:
                bucketIds = bucketIdsStr.split(",")

            batchCorpus = Corpus(envId, authorization, corpusId)

            batchCorpus.upload_annotations(bucketIds, schemaTypes, destUrl,
                                           zipFileName, isSendPut, isMultipart,
                                           multipartFieldName)

            self.write_and_set_status({}, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except UploadUrlFailException as upErr:
            self.write_and_set_status(
                {MESSAGE: "Upload failed due: {0}".format(str(upErr))},
                HTTPStatus.UNPROCESSABLE_ENTITY)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 12
0
 def get(self):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpora = get_master_document_corpus_list(envId, authorization)
         corporaInfos = corpora.get_corpuses_list()
         self.write_and_set_status({"data": corporaInfos},
                                   HTTPStatus.OK)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                   HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 13
0
    def post(self):
        body = self.request.body.decode("utf-8")
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            json_args = json.loads(body)
            for requiredField in [CORPUS_LANGUAGES]:
                if requiredField not in json_args:
                    self.write_and_set_status({MESSAGE: "Missing required parameters. {0}".format(requiredField)},
                                              HTTPStatus.UNPROCESSABLE_ENTITY)
                    return

            languages = json_args.get(CORPUS_LANGUAGES, None)
            try:
                languageManager = get_language_manager()
                for language in languages:
                    if not languageManager.has_es_analyser(language):
                        self.write_and_set_status({MESSAGE: "Invalid language: " + language},
                                                  HTTPStatus.UNPROCESSABLE_ENTITY)
                        return
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpusId = json_args.get(CORPUS_ID, None)

            if corpusId and not valid_es_id(corpusId):
                self.write_and_set_status({
                                              MESSAGE: "Corpus id invalid '{0}' . CorpusId can only be lowercase,alphanumeric with -_".format(
                                                  corpusId)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.create_corpus(corpusId, languages)
            self.write_and_set_status({"id": corpus.id},
                                      HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except CorpusInvalidFieldException as ci:
            self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)
        except CorpusAlreadyExistsException:
            self.write_and_set_status({MESSAGE: "Corpus with the same id already exists"},
                                      HTTPStatus.CONFLICT)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 14
0
 def delete(self, corpusId):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpora = get_master_document_corpus_list(envId, authorization)
         corpora.delete_corpus(corpusId)
         self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
     except CorpusNotFoundException:
         self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                   HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                   HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 15
0
    def get(self):
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            try:
                corpusIdsArgument = self.get_query_argument("corpusIds", default=None)
                if not corpusIdsArgument:
                    self.write_and_set_status({MESSAGE: "Missing corpusIds parameter"},
                                              HTTPStatus.UNPROCESSABLE_ENTITY)
                    return
                else:
                    corpusIds = corpusIdsArgument.split(",")
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid data passed in corpusIds parameter"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            filters = parse_filters_argument(self.get_query_argument("filters", default=None))
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)

            env_id = get_env_id()
            authorization = get_autorisation(env_id, None, None)
            mc = MultiCorpus(env_id, authorization)
            count, annotations = mc.get_annotations_of_type(
                corpusIds, SCHEMA_TYPE_DOCUMENT_METADATA,
                fromIndex, size, sortBy, sortOrder, filters, filterJoin)

            self.write_and_set_status({
                "count": count,
                "annotations": annotations},
                HTTPStatus.OK)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 16
0
def partial_corpora_indices(corpus_ids: List[str]) -> str:
    # The idiomatic way would be to instantiate a corpus for each corpus Id and then do a search in each corpus
    # for the bucket with the right schema type.
    # As it represent a 2n operation before doing the main search, and fearing latency,
    # I, Jean-François Héon, decided to perform this (possibly premature) optimization.
    settings = get_settings()
    annotation_directory = settings['CLASSES']['DOCUMENT_DIRECTORY']['CLASS_PREFIX']
    data_suffix = settings['CLASSES']['DOCUMENT_DIRECTORY']['INDEX_DATA_SUFFIX']
    index_suffix = '*' + data_suffix + '_*'
    index_prefix = get_env_id() + annotation_directory
    indices = []
    for corpus_id in corpus_ids:
        indices.append(index_prefix + corpus_id + index_suffix)
    joined_indices = ','.join(indices)
    return joined_indices
Ejemplo n.º 17
0
    def post(self, corpusId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))

            language = body.get("language")
            if not language:
                self.write_and_set_status(
                    {MESSAGE: "Missing required parameters"})
                self.set_status(HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            docId = body.get(
                "id")  # Note: 'get' defaults to None when key does not exist
            text = body.get("text", "")
            title = body.get("title", "")
            source = body.get("source", "")

            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            if not language in corpus.languages:
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Document language do not correspond to corpus language"
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            docId = corpus.add_text_document(text, title, language, docId,
                                             source)

            self.write_and_set_status({"id": docId}, HTTPStatus.OK)
        except DocumentAlreadyExistsException:
            self.write_and_set_status(
                {MESSAGE: "Document with the same id already exists"},
                HTTPStatus.CONFLICT)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 18
0
 def post(self):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         envList = get_env_list(authorization)
         env = envList.get_env(envId)
         envList.delete_env(env.id)
         es_wait_ready()
         sleep(5)
         env = get_env_list(authorization).create_env(env.id, env.name)
         es_wait_ready()
         self.write_and_set_status(None, HTTPStatus.OK)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write({
             MESSAGE: "Internal server error",
             TRACE: trace
         }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 19
0
    def get(self, corpusId):
        try:
            includeSchemaJson = 'true' == self.get_query_argument(INCLUDE_SCHEMA_JSON, default=False)

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            buckets = get_master_document_corpus_list(envId, authorization).get_corpus(corpusId).get_buckets()
            augmentedBuckets = [getBucketWithSchema(bucket, includeSchemaJson) for bucket in buckets]

            self.write_and_set_status({"buckets": augmentedBuckets},
                                      HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 20
0
    def post(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            annotationId = get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .add_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status({"id": annotationId},
                                      HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentAlreadyExistsException:
            self.write_and_set_status({MESSAGE: "Annotation with the same id already exist"},
                                      HTTPStatus.CONFLICT)
        except JSONDecodeError:
            self.write_and_set_status({MESSAGE: "Invalid JSON format for annotation"},
                                      HTTPStatus.BAD_REQUEST)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 21
0
    def post(self, corpusId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            bucketId = None
            bucketName = None

            if "id" in body:
                bucketId = body["id"]
            if "name" in body:
                bucketName = body["name"]

            if bucketId and not valid_es_id(bucketId):
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Bucket id invalid '{0}' . BucketId can only be lowercase,alphanumeric with -_"
                        .format(bucketId)
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket = get_master_document_corpus_list(envId, authorization). \
                get_corpus(corpusId).create_bucket(bucketName, bucketId)
            self.write_and_set_status({"id": bucket.id}, HTTPStatus.OK)
        except BucketAlreadyExistsException:
            self.write_and_set_status(
                {MESSAGE: "Bucket with the same id already exists"},
                HTTPStatus.CONFLICT)
        except CorpusNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Corpus does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.UNPROCESSABLE_ENTITY)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 22
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentCorpus = DocumentCorpus(envId, authorization, corpusId)
            zipPath = documentCorpus.get_documents_zip()
            zipName = os.path.basename(zipPath)
            self.send_zip_file_with_get(zipPath, zipName)
            documentCorpus.clear_temporary_files()

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            logger = logging.getLogger(__name__)
            logger.error(str(trace))
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 23
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.get_corpus(corpusId)
            info = {
                CORPUS_ID: corpus.id,
                CORPUS_LANGUAGES: corpus.languages,
                CORPUS_MODIFICATION_DATE: datetime_to_json_str(corpus.modificationDate),
                CORPUS_DOCUMENT_COUNT: corpus.get_documents_count()
            }
            self.write_and_set_status(info, HTTPStatus.OK)

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 24
0
    def get(self, corpusId, bucketId, schemaType):
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentSearch = DocumentSearch(envId, authorization, None, corpusId)

            filters = parse_filters_argument(self.get_query_argument("filters", default=None))
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)

            count, annotations = documentSearch.search_annotations_for_one_type(
                bucketId, schemaType,
                fromIndex, size, sortBy, sortOrder, filters, filterJoin)

            self.write_and_set_status({
                "count": count,
                "annotations": annotations},
                HTTPStatus.OK)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 25
0
    def getAnnotations(self, corpusId, documentIds: List[str]):
        schemaTypesByBucketId = {}

        try:
            schemaTypes = self.get_arguments("schemaTypes")
            if not schemaTypes:
                self.write_and_set_status({MESSAGE: "Missing schemaTypes parameter"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            schemaTypes = schemaTypes[0].split(",")  # tornado crap syntax
            for bucketIdWithSchemaType in schemaTypes:
                bucketId = bucketIdWithSchemaType.split(":")[0]
                schemaType = bucketIdWithSchemaType.split(":")[1]
                if not bucketId in schemaTypesByBucketId:
                    schemaTypesByBucketId[bucketId] = []
                schemaTypesByBucketId[bucketId].append(schemaType)
        except Exception as e:
            self.write_and_set_status({MESSAGE: "Invalid data passed in schemaTypes parameter"},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)
            return

        # TODO i put some arbitrary large number for offset if not present
        offsetBegin = self.get_argument("offsetBegin", MIN_OFFSET_BEGIN)
        offsetEnd = self.get_argument("offsetEnd", MAX_OFFSET_END)

        envId = get_env_id()
        authorization = get_autorisation(envId, None, None)
        documentSearch = DocumentSearch(envId, authorization, documentIds, corpusId)
        offsets = None
        if not (offsetBegin == MIN_OFFSET_BEGIN and offsetEnd == MAX_OFFSET_END):
            offsets = [Interval(offsetBegin, offsetEnd, False, False, False)]
        res = documentSearch.get_annotations(schemaTypesByBucketId, offsets)
        if not res[corpusId]:
            self.write({})
        else:
            self.write(res)
        self.write_and_set_status(None,
                                  HTTPStatus.OK)
Ejemplo n.º 26
0
    def delete(self, corpusId, bucketId, schemaType):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            bucket = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId).get_bucket(bucketId)
            schemas = bucket.get_schemas_info(False)
            schemaTypes = [schema['schemaType'] for schema in schemas['data']]
            if not schemaType in schemaTypes:
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Schema Type: {0} does not exist".format(schemaType)
                    }, HTTPStatus.NOT_FOUND)
                return

            bucket.delete_schema_type(schemaType)
            self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
        except CorpusNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Corpus does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.NOT_FOUND)
        except BucketNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Bucket does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 27
0
    def put(self, corpusId):
        try:
            body = self.request.body.decode("utf-8")
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            json_args = json.loads(body)

            try:
                languages = json_args.get(CORPUS_LANGUAGES, None)
                if languages:
                    languageManager = get_language_manager()
                    for language in languages:
                        if not languageManager.has_es_analyser(language):
                            self.write_and_set_status({MESSAGE: "Invalid language: " + language},
                                                      HTTPStatus.UNPROCESSABLE_ENTITY)
                            return
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.update_corpus(corpusId, languages)
            self.write_and_set_status(None, HTTPStatus.NO_CONTENT)

        except CorpusInvalidFieldException as ci:
            self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 28
0
    def post(self, corpusId, bucketId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            # download file into temp url location
            tmpUploadFolder = get_jass_tmp_dir()
            fileinfo = self.request.files['file'][0]
            fname = fileinfo['filename']
            ext = os.path.splitext(fname)[1]
            zipName = str(uuid1) + ext
            zipPath = os.path.join(tmpUploadFolder, zipName)
            f = open(zipPath, 'bw')
            f.write(fileinfo['body'])
            f.close()

            # add annotations in batch
            batchCorpus = Corpus(envId, authorization, corpusId)
            errors = batchCorpus.add_annotations(bucketId, zipPath)

            # delete zip file
            os.remove(zipPath)

            if errors:
                self.write_and_set_status(errors,
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
            else:
                self.write_and_set_status(None, HTTPStatus.OK)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 29
0
def corpus_from_id(corpus_id: str) -> DocumentCorpus:
    env_id = get_env_id()
    authorization = get_autorisation(env_id, None, None)
    corpora = get_master_document_corpus_list(env_id, authorization)
    return corpora.get_corpus(corpus_id)
Ejemplo n.º 30
0
    def post(self, corpusId, bucketId):
        try:
            body = self.strip_body_bom()
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            if is_missing_required_fields(
                    body, ["targetType", "schemaType", "properties"]):
                self.write_and_set_status(
                    {
                        MESSAGE:
                        missing_fields_message(
                            body, ["targetType", "schemaType", "properties"])
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            schemaType = body["schemaType"]
            targetTypeName = body["targetType"]
            if not TargetType.has(targetTypeName):
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Target type {0} not supported".format(targetTypeName)
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId).get_bucket(bucketId)
            schemas = bucket.get_schemas_info(False)
            schemaTypes = [schema['schemaType'] for schema in schemas['data']]
            if schemaType in schemaTypes:
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "A schema with the schemaType '{0}' is already bound to the bucket."
                        .format(schemaType)
                    }, HTTPStatus.FORBIDDEN)
                return

            # check if schema with the same has as the current annotation exist:
            targetType = TargetType(targetTypeName)
            nestedFields = []
            if targetType == TargetType.document_surface1d:
                nestedFields.append("offsets")
            schemaId = get_schema_list(envId,
                                       authorization).add_json_schema_as_hash(
                                           body, False, nestedFields)
            bucket.add_or_update_schema_to_bucket(schemaId, schemaType,
                                                  targetType, {})

            self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
        except CorpusNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Corpus does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.UNPROCESSABLE_ENTITY)
        except BucketNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Bucket does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.UNPROCESSABLE_ENTITY)
        except SchemaBindingInvalid as err:
            self.write_and_set_status(
                {MESSAGE: "Schema Binding Invalid: '{0}'".format(err)},
                HTTPStatus.UNPROCESSABLE_ENTITY)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)