Ejemplo n.º 1
0
    def delete(self, corpusId, documentId):
        """Delete a single document an optionally its annotations"""
        try:
            delete_annotations_argument = self.get_query_argument(
                "deleteAnnotations", None)
            if not delete_annotations_argument:
                self.missing_required_field("deleteAnnotations")
                return

            delete_annotations = 'true' == delete_annotations_argument

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            document = corpus.delete_document(documentId, delete_annotations)
            self.write_and_set_status(document, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status(
                {MESSAGE: "Specified document not found"},
                HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 2
0
    def get(self, corpusId, documentId):
        """Get a single document from corpus"""
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            document = corpus.get_text_document(documentId)

            if document is None:
                raise DocumentNotFoundException(documentId)

            self.write_and_set_status(document, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status(
                {MESSAGE: "Specified document not found"},
                HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 3
0
 def post(self):
     try:
         body = json.loads(self.request.body.decode("utf-8"))
         envId = None
         envName = None
         if "id" in body:
             envId = body["id"]
         if "name" in body:
             envName = body["name"]
         # TODO replace by true authorization
         authorization = get_autorisation(envId, None, None)
         env = get_env_list(authorization).create_env(envId, envName)
         self.write_and_set_status(
             {
                 "id": env.id,
                 "name": env.name,
                 "securityType": "basic"
             }, HTTPStatus.OK)
     except EnvAlreadyExistWithSameIdException:
         self.write_and_set_status(
             {MESSAGE: "env with the same id already exists"},
             HTTPStatus.CONFLICT)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status(
             {
                 MESSAGE: "Internal server error",
                 TRACE: trace
             }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 4
0
    def get(self, corpusId, bucketId, annotationId):
        try:
            docType = self.get_argument("schemaType", None)
            if not docType:
                self.write_and_set_status({MESSAGE: "Missing schemaType."},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            anno = get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .get_annotation(annotationId, docType)

            annotationId = anno["id"]
            anno["annotationId"] = anno["id"]
            del anno["id"]
            self.write_and_set_status(anno,
                                      HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id and schemaType does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 5
0
    def delete(self, corpusId, bucketId, annotationId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = self.get_argument("schemaType", None)
            if not docType:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.NOT_FOUND)
                return

            get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .delete_annotation(annotationId, docType)
            self.write_and_set_status(None,
                                      HTTPStatus.NO_CONTENT)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 6
0
 def delete(self, corpusId, bucketId):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpus = get_master_document_corpus_list(
             envId, authorization).get_corpus(corpusId)
         corpus.delete_bucket(bucketId)
         self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
     except BucketNotFoundException as err:
         self.write_and_set_status(
             {
                 MESSAGE:
                 "Bucket does not exist.Extra info: '{0}'".format(err)
             }, HTTPStatus.NOT_FOUND)
     except CorpusNotFoundException as err:
         self.write_and_set_status(
             {
                 MESSAGE:
                 "Corpus does not exist.Extra info: '{0}'".format(err)
             }, HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status(
             {
                 MESSAGE: "Internal server error",
                 TRACE: trace
             }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 7
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            schemaTypesStr = self.get_query_argument("schemaTypes", None)
            bucketIdsStr = self.get_query_argument("bucketIds", None)
            schemaTypes = []
            bucketIds = []
            if schemaTypesStr:
                schemaTypes = schemaTypesStr.split(",")
            if bucketIdsStr:
                bucketIds = bucketIdsStr.split(",")

            batchCorpus = Corpus(envId, authorization, corpusId)
            zipPath = batchCorpus.create_tmp_annotations_zip(
                bucketIds, schemaTypes)
            self.send_zip_file_with_get(zipPath, os.path.basename(zipPath))
            batchCorpus.clear_temporary_files()
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 8
0
    def get(self, corpusId, bucketId):
        try:
            schemaTypesArgument = self.get_query_argument("schemaTypes",
                                                          default=None)
            if not schemaTypesArgument:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaTypes parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            else:
                schemaTypes = schemaTypesArgument.split(",")

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentSearch = DocumentSearch(envId, authorization, [], corpusId)

            counts = documentSearch.count_annotations_for_types(
                bucketId, schemaTypes)

            self.write_and_set_status(counts, HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 9
0
    def get(self, corpusId):
        """Get documents from corpus according to pagination"""
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status(
                    {MESSAGE: "'from' must cannot be less than zero"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status(
                    {MESSAGE: "'size' cannot be less than 1"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            size = min(size, MAX_DOCUMENT_SIZE)

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            filterTitle = self.get_query_argument("filterTitle", default=None)
            filterSource = self.get_query_argument("filterSource",
                                                   default=None)
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)
            documents = corpus.get_text_documents(fromIndex, size, sortBy,
                                                  sortOrder, filterTitle,
                                                  filterSource, filterJoin)

            self.write_and_set_status({"documents": documents}, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except ValueError as ve:
            self.write_and_set_status(
                {MESSAGE: "Invalid 'from' or 'size' parameter"},
                HTTPStatus.UNPROCESSABLE_ENTITY)
        except TransportError as te:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "ES TransportError",
                    TRACE: trace
                }, te.status_code)
        except Exception as e:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 10
0
    def put(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing annotationId field required to find an annotation to update."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "bucketId" in body:
                newBucketId = body["bucketId"]
                if newBucketId != bucketId:
                    self.write_and_set_status(
                        {MESSAGE: "bucketId from the path is different than bucketId in the body."},
                        HTTPStatus.UNPROCESSABLE_ENTITY)
                    return

            bucket = get_master_bucket_list(envId, authorization).get_bucket(corpusId, bucketId)
            storedAnnotation = bucket.get_annotation(id=annotationId, docType=docType)
            if storedAnnotation["schemaType"] != docType:
                self.write_and_set_status(
                    {MESSAGE: "You cannot change the schemaType of an annotation."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket.update_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status(None,
                                      HTTPStatus.NO_CONTENT)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 11
0
Archivo: app.py Proyecto: crim-ca/RACS
def set_up_environment():
    try:
        es_wait_ready()
        envId = get_env_id()
        authorization = get_autorisation(envId, None, None)
        envList = get_env_list(authorization)
        envList.get_env(envId)
    except EnvNotFoundException:
        es_wait_ready()
        envList.create_env(envId)
Ejemplo n.º 12
0
    def post(self, corpusId):
        try:

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            body = json.loads(self.request.body.decode("utf-8"))

            zipFileName = body.get("zipFileName")
            destUrl = body.get("destUrl")
            isSendPut = body.get("isSendPut", True)
            schemaTypesStr = body.get("schemaTypes", None)
            bucketIdsStr = body.get("bucketIds", None)
            isMultipart = body.get("isMultipart", False)
            multipartFieldName = body.get("multipartFieldName", "")

            if not zipFileName:
                self.write_and_set_status(
                    {MESSAGE: "Missing 'zipFileName' parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            if not destUrl:
                self.write_and_set_status(
                    {MESSAGE: "Missing 'destUrl' parameter"},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            schemaTypes = []
            bucketIds = []
            if schemaTypesStr:
                schemaTypes = schemaTypesStr.split(",")
            if bucketIdsStr:
                bucketIds = bucketIdsStr.split(",")

            batchCorpus = Corpus(envId, authorization, corpusId)

            batchCorpus.upload_annotations(bucketIds, schemaTypes, destUrl,
                                           zipFileName, isSendPut, isMultipart,
                                           multipartFieldName)

            self.write_and_set_status({}, HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except UploadUrlFailException as upErr:
            self.write_and_set_status(
                {MESSAGE: "Upload failed due: {0}".format(str(upErr))},
                HTTPStatus.UNPROCESSABLE_ENTITY)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 13
0
 def get(self):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpora = get_master_document_corpus_list(envId, authorization)
         corporaInfos = corpora.get_corpuses_list()
         self.write_and_set_status({"data": corporaInfos},
                                   HTTPStatus.OK)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                   HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 14
0
    def post(self):
        body = self.request.body.decode("utf-8")
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            json_args = json.loads(body)
            for requiredField in [CORPUS_LANGUAGES]:
                if requiredField not in json_args:
                    self.write_and_set_status({MESSAGE: "Missing required parameters. {0}".format(requiredField)},
                                              HTTPStatus.UNPROCESSABLE_ENTITY)
                    return

            languages = json_args.get(CORPUS_LANGUAGES, None)
            try:
                languageManager = get_language_manager()
                for language in languages:
                    if not languageManager.has_es_analyser(language):
                        self.write_and_set_status({MESSAGE: "Invalid language: " + language},
                                                  HTTPStatus.UNPROCESSABLE_ENTITY)
                        return
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpusId = json_args.get(CORPUS_ID, None)

            if corpusId and not valid_es_id(corpusId):
                self.write_and_set_status({
                                              MESSAGE: "Corpus id invalid '{0}' . CorpusId can only be lowercase,alphanumeric with -_".format(
                                                  corpusId)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.create_corpus(corpusId, languages)
            self.write_and_set_status({"id": corpus.id},
                                      HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except CorpusInvalidFieldException as ci:
            self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)
        except CorpusAlreadyExistsException:
            self.write_and_set_status({MESSAGE: "Corpus with the same id already exists"},
                                      HTTPStatus.CONFLICT)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 15
0
    def get(self):
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            try:
                corpusIdsArgument = self.get_query_argument("corpusIds", default=None)
                if not corpusIdsArgument:
                    self.write_and_set_status({MESSAGE: "Missing corpusIds parameter"},
                                              HTTPStatus.UNPROCESSABLE_ENTITY)
                    return
                else:
                    corpusIds = corpusIdsArgument.split(",")
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid data passed in corpusIds parameter"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            filters = parse_filters_argument(self.get_query_argument("filters", default=None))
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)

            env_id = get_env_id()
            authorization = get_autorisation(env_id, None, None)
            mc = MultiCorpus(env_id, authorization)
            count, annotations = mc.get_annotations_of_type(
                corpusIds, SCHEMA_TYPE_DOCUMENT_METADATA,
                fromIndex, size, sortBy, sortOrder, filters, filterJoin)

            self.write_and_set_status({
                "count": count,
                "annotations": annotations},
                HTTPStatus.OK)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 16
0
 def delete(self, corpusId):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         corpora = get_master_document_corpus_list(envId, authorization)
         corpora.delete_corpus(corpusId)
         self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
     except CorpusNotFoundException:
         self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                   HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                   HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 17
0
 def delete(self, envId):
     try:
         authorization = get_autorisation(envId, None, None)
         env = get_env_list(authorization).delete_env(envId)
         self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
     except EnvNotFoundException:
         self.write_and_set_status(
             {MESSAGE: "env with id : {0} doest exists".format(envId)},
             HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status(
             {
                 MESSAGE: "Internal server error",
                 TRACE: trace
             }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 18
0
    def post(self, corpusId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))

            language = body.get("language")
            if not language:
                self.write_and_set_status(
                    {MESSAGE: "Missing required parameters"})
                self.set_status(HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            docId = body.get(
                "id")  # Note: 'get' defaults to None when key does not exist
            text = body.get("text", "")
            title = body.get("title", "")
            source = body.get("source", "")

            corpus = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId)
            if not language in corpus.languages:
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Document language do not correspond to corpus language"
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            docId = corpus.add_text_document(text, title, language, docId,
                                             source)

            self.write_and_set_status({"id": docId}, HTTPStatus.OK)
        except DocumentAlreadyExistsException:
            self.write_and_set_status(
                {MESSAGE: "Document with the same id already exists"},
                HTTPStatus.CONFLICT)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 19
0
    def get(self, corpusId):
        try:
            includeSchemaJson = 'true' == self.get_query_argument(INCLUDE_SCHEMA_JSON, default=False)

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            buckets = get_master_document_corpus_list(envId, authorization).get_corpus(corpusId).get_buckets()
            augmentedBuckets = [getBucketWithSchema(bucket, includeSchemaJson) for bucket in buckets]

            self.write_and_set_status({"buckets": augmentedBuckets},
                                      HTTPStatus.OK)
        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 20
0
 def post(self):
     try:
         envId = get_env_id()
         authorization = get_autorisation(envId, None, None)
         envList = get_env_list(authorization)
         env = envList.get_env(envId)
         envList.delete_env(env.id)
         es_wait_ready()
         sleep(5)
         env = get_env_list(authorization).create_env(env.id, env.name)
         es_wait_ready()
         self.write_and_set_status(None, HTTPStatus.OK)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write({
             MESSAGE: "Internal server error",
             TRACE: trace
         }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 21
0
    def post(self, corpusId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            bucketId = None
            bucketName = None

            if "id" in body:
                bucketId = body["id"]
            if "name" in body:
                bucketName = body["name"]

            if bucketId and not valid_es_id(bucketId):
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Bucket id invalid '{0}' . BucketId can only be lowercase,alphanumeric with -_"
                        .format(bucketId)
                    }, HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket = get_master_document_corpus_list(envId, authorization). \
                get_corpus(corpusId).create_bucket(bucketName, bucketId)
            self.write_and_set_status({"id": bucket.id}, HTTPStatus.OK)
        except BucketAlreadyExistsException:
            self.write_and_set_status(
                {MESSAGE: "Bucket with the same id already exists"},
                HTTPStatus.CONFLICT)
        except CorpusNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Corpus does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.UNPROCESSABLE_ENTITY)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 22
0
    def post(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            annotationId = get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .add_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status({"id": annotationId},
                                      HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentAlreadyExistsException:
            self.write_and_set_status({MESSAGE: "Annotation with the same id already exist"},
                                      HTTPStatus.CONFLICT)
        except JSONDecodeError:
            self.write_and_set_status({MESSAGE: "Invalid JSON format for annotation"},
                                      HTTPStatus.BAD_REQUEST)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 23
0
 def get(self, envId):
     try:
         authorization = get_autorisation(envId, None, None)
         env = get_env_list(authorization).get_env(envId)
         self.write_and_set_status(
             json.dumps({
                 "id": env.id,
                 "name": env.name,
                 "securityType": "basic"
             }), HTTPStatus.OK)
     except EnvNotFoundException:
         self.write_and_set_status(
             {MESSAGE: "env with id : {0} doest exists".format(envId)},
             HTTPStatus.NOT_FOUND)
     except Exception:
         trace = traceback.format_exc().splitlines()
         self.write_and_set_status(
             {
                 MESSAGE: "Internal server error",
                 TRACE: trace
             }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 24
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentCorpus = DocumentCorpus(envId, authorization, corpusId)
            zipPath = documentCorpus.get_documents_zip()
            zipName = os.path.basename(zipPath)
            self.send_zip_file_with_get(zipPath, zipName)
            documentCorpus.clear_temporary_files()

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            logger = logging.getLogger(__name__)
            logger.error(str(trace))
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 25
0
    def get(self, corpusId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.get_corpus(corpusId)
            info = {
                CORPUS_ID: corpus.id,
                CORPUS_LANGUAGES: corpus.languages,
                CORPUS_MODIFICATION_DATE: datetime_to_json_str(corpus.modificationDate),
                CORPUS_DOCUMENT_COUNT: corpus.get_documents_count()
            }
            self.write_and_set_status(info, HTTPStatus.OK)

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 26
0
    def getAnnotations(self, corpusId, documentIds: List[str]):
        schemaTypesByBucketId = {}

        try:
            schemaTypes = self.get_arguments("schemaTypes")
            if not schemaTypes:
                self.write_and_set_status({MESSAGE: "Missing schemaTypes parameter"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return
            schemaTypes = schemaTypes[0].split(",")  # tornado crap syntax
            for bucketIdWithSchemaType in schemaTypes:
                bucketId = bucketIdWithSchemaType.split(":")[0]
                schemaType = bucketIdWithSchemaType.split(":")[1]
                if not bucketId in schemaTypesByBucketId:
                    schemaTypesByBucketId[bucketId] = []
                schemaTypesByBucketId[bucketId].append(schemaType)
        except Exception as e:
            self.write_and_set_status({MESSAGE: "Invalid data passed in schemaTypes parameter"},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)
            return

        # TODO i put some arbitrary large number for offset if not present
        offsetBegin = self.get_argument("offsetBegin", MIN_OFFSET_BEGIN)
        offsetEnd = self.get_argument("offsetEnd", MAX_OFFSET_END)

        envId = get_env_id()
        authorization = get_autorisation(envId, None, None)
        documentSearch = DocumentSearch(envId, authorization, documentIds, corpusId)
        offsets = None
        if not (offsetBegin == MIN_OFFSET_BEGIN and offsetEnd == MAX_OFFSET_END):
            offsets = [Interval(offsetBegin, offsetEnd, False, False, False)]
        res = documentSearch.get_annotations(schemaTypesByBucketId, offsets)
        if not res[corpusId]:
            self.write({})
        else:
            self.write(res)
        self.write_and_set_status(None,
                                  HTTPStatus.OK)
Ejemplo n.º 27
0
    def get(self, corpusId, bucketId, schemaType):
        try:
            fromIndexArgument = self.get_query_argument("from")
            fromIndex = int(fromIndexArgument)
            if fromIndex < 0:
                self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            sizeArgument = self.get_query_argument("size")
            size = int(sizeArgument)

            if size < 1:
                self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            documentSearch = DocumentSearch(envId, authorization, None, corpusId)

            filters = parse_filters_argument(self.get_query_argument("filters", default=None))
            filterJoin = self.get_query_argument("filterJoin", default=None)
            sortBy = self.get_query_argument("sortBy", default=None)
            sortOrder = self.get_query_argument("sortOrder", default=None)

            count, annotations = documentSearch.search_annotations_for_one_type(
                bucketId, schemaType,
                fromIndex, size, sortBy, sortOrder, filters, filterJoin)

            self.write_and_set_status({
                "count": count,
                "annotations": annotations},
                HTTPStatus.OK)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 28
0
    def delete(self, corpusId, bucketId, schemaType):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            bucket = get_master_document_corpus_list(
                envId, authorization).get_corpus(corpusId).get_bucket(bucketId)
            schemas = bucket.get_schemas_info(False)
            schemaTypes = [schema['schemaType'] for schema in schemas['data']]
            if not schemaType in schemaTypes:
                self.write_and_set_status(
                    {
                        MESSAGE:
                        "Schema Type: {0} does not exist".format(schemaType)
                    }, HTTPStatus.NOT_FOUND)
                return

            bucket.delete_schema_type(schemaType)
            self.write_and_set_status(None, HTTPStatus.NO_CONTENT)
        except CorpusNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Corpus does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.NOT_FOUND)
        except BucketNotFoundException as err:
            self.write_and_set_status(
                {
                    MESSAGE:
                    "Bucket does not exist.Extra info: '{0}'".format(err)
                }, HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 29
0
    def put(self, corpusId):
        try:
            body = self.request.body.decode("utf-8")
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            json_args = json.loads(body)

            try:
                languages = json_args.get(CORPUS_LANGUAGES, None)
                if languages:
                    languageManager = get_language_manager()
                    for language in languages:
                        if not languageManager.has_es_analyser(language):
                            self.write_and_set_status({MESSAGE: "Invalid language: " + language},
                                                      HTTPStatus.UNPROCESSABLE_ENTITY)
                            return
            except Exception as e:
                self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)},
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            corpora = get_master_document_corpus_list(envId, authorization)
            corpus = corpora.update_corpus(corpusId, languages)
            self.write_and_set_status(None, HTTPStatus.NO_CONTENT)

        except CorpusInvalidFieldException as ci:
            self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)},
                                      HTTPStatus.UNPROCESSABLE_ENTITY)

        except CorpusNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified corpus not found"},
                                      HTTPStatus.NOT_FOUND)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Ejemplo n.º 30
0
    def post(self, corpusId, bucketId):
        try:
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)

            # download file into temp url location
            tmpUploadFolder = get_jass_tmp_dir()
            fileinfo = self.request.files['file'][0]
            fname = fileinfo['filename']
            ext = os.path.splitext(fname)[1]
            zipName = str(uuid1) + ext
            zipPath = os.path.join(tmpUploadFolder, zipName)
            f = open(zipPath, 'bw')
            f.write(fileinfo['body'])
            f.close()

            # add annotations in batch
            batchCorpus = Corpus(envId, authorization, corpusId)
            errors = batchCorpus.add_annotations(bucketId, zipPath)

            # delete zip file
            os.remove(zipPath)

            if errors:
                self.write_and_set_status(errors,
                                          HTTPStatus.UNPROCESSABLE_ENTITY)
            else:
                self.write_and_set_status(None, HTTPStatus.OK)

        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status(
                {
                    MESSAGE: "Internal server error",
                    TRACE: trace
                }, HTTPStatus.INTERNAL_SERVER_ERROR)