Esempio n. 1
0
    def put(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing annotationId field required to find an annotation to update."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            if "bucketId" in body:
                newBucketId = body["bucketId"]
                if newBucketId != bucketId:
                    self.write_and_set_status(
                        {MESSAGE: "bucketId from the path is different than bucketId in the body."},
                        HTTPStatus.UNPROCESSABLE_ENTITY)
                    return

            bucket = get_master_bucket_list(envId, authorization).get_bucket(corpusId, bucketId)
            storedAnnotation = bucket.get_annotation(id=annotationId, docType=docType)
            if storedAnnotation["schemaType"] != docType:
                self.write_and_set_status(
                    {MESSAGE: "You cannot change the schemaType of an annotation."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            bucket.update_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status(None,
                                      HTTPStatus.NO_CONTENT)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentNotFoundException:
            self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"},
                                      HTTPStatus.NOT_FOUND)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)
Esempio n. 2
0
File: app.py Progetto: crim-ca/RACS
def initialize_es():
    """
    Initialise elastic search if required.

    :return:
    """

    sett = get_settings()
    es_wait_ready()
    EnvList.initialize_env_list(sett['CLASSES']['ENV'])
Esempio n. 3
0
def partial_corpora_indices(corpus_ids: List[str]) -> str:
    # The idiomatic way would be to instantiate a corpus for each corpus Id and then do a search in each corpus
    # for the bucket with the right schema type.
    # As it represent a 2n operation before doing the main search, and fearing latency,
    # I, Jean-François Héon, decided to perform this (possibly premature) optimization.
    settings = get_settings()
    annotation_directory = settings['CLASSES']['DOCUMENT_DIRECTORY']['CLASS_PREFIX']
    data_suffix = settings['CLASSES']['DOCUMENT_DIRECTORY']['INDEX_DATA_SUFFIX']
    index_suffix = '*' + data_suffix + '_*'
    index_prefix = get_env_id() + annotation_directory
    indices = []
    for corpus_id in corpus_ids:
        indices.append(index_prefix + corpus_id + index_suffix)
    joined_indices = ','.join(indices)
    return joined_indices
Esempio n. 4
0
 def setUp(self):
     try:
         setting = get_settings()
         self.envId = "unittest_"
         self.authorization = BaseAuthorization("unittest_", None, None,
                                                None)
         self.envList1 = get_env_list(self.authorization)
         try:
             self.envList1.create_env(self.envId)
         except EnvAlreadyExistWithSameIdException:
             time.sleep(1)
             self.envList1.delete_env(self.envId)
             self.envList1.create_env(self.envId)
     finally:
         pass
Esempio n. 5
0
 def setUp(self):
     es = get_es_conn()
     es.indices.delete(index="unittest_*")
     time.sleep(0.1)
     setting = get_settings()
     self.envId = "unittest_"
     self.authorization = BaseAuthorization.create_authorization(
         self.envId, None, None)
     self.masterList = DocumentDirectoryList.create(
         self.envId, setting['CLASSES']['DOCUMENT_DIRECTORY'],
         self.authorization)
     self.bucketList = BucketList.create(self.envId,
                                         setting['CLASSES']['BUCKET'],
                                         self.authorization)
     self.documentCorpusList = DocumentCorpusList.create(
         self.envId, setting['CLASSES']['DOCUMENT_CORPUS'],
         self.authorization)
Esempio n. 6
0
 def set_up_corpus(self):
     corpus = get_master_document_corpus_list(
         self.envId, self.authorization).create_corpus("corpus1")
     time.sleep(1)
     bucket1 = corpus.create_bucket("bucket1", "bucket1")
     setting = get_settings()
     self.schemaList = get_schema_list(self.envId, self.authorization)
     schemaNormalId = self.schemaList.add_json_schema_as_hash(SCHEMA_NORMAL)
     schemaOffsetsId = self.schemaList.add_json_schema_as_hash(
         SCHEMA_OFFSETS, False, nestedFields=["offsets"])
     time.sleep(1)
     bucket1.add_or_update_schema_to_bucket(schemaNormalId, "sentence",
                                            TargetType.document_surface1d,
                                            {})
     bucket1.add_or_update_schema_to_bucket(schemaOffsetsId, "token",
                                            TargetType.document_surface1d,
                                            {})
     time.sleep(1)
Esempio n. 7
0
    def post(self, corpusId, bucketId):
        try:
            body = json.loads(self.request.body.decode("utf-8"))
            envId = get_env_id()
            authorization = get_autorisation(envId, None, None)
            docType = None
            annotationId = None

            sett = get_settings()
            shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR']

            if "annotationId" in body:
                annotationId = body["annotationId"]
                del body["annotationId"]

            if "schemaType" in body:
                docType = body["schemaType"]
            else:
                self.write_and_set_status(
                    {MESSAGE: "Missing schemaType field, which links the annotation to its schema."},
                    HTTPStatus.UNPROCESSABLE_ENTITY)
                return

            annotationId = get_master_bucket_list(envId, authorization) \
                .get_bucket(corpusId, bucketId) \
                .add_annotation(body, docType, annotationId, shouldValidate)

            self.write_and_set_status({"id": annotationId},
                                      HTTPStatus.OK)
        except BucketNotFoundException:
            self.write_and_set_status({MESSAGE: "Specified bucket not found"},
                                      HTTPStatus.NOT_FOUND)
        except DocumentAlreadyExistsException:
            self.write_and_set_status({MESSAGE: "Annotation with the same id already exist"},
                                      HTTPStatus.CONFLICT)
        except JSONDecodeError:
            self.write_and_set_status({MESSAGE: "Invalid JSON format for annotation"},
                                      HTTPStatus.BAD_REQUEST)
        except Exception:
            trace = traceback.format_exc().splitlines()
            self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace},
                                      HTTPStatus.INTERNAL_SERVER_ERROR)