def delete(self, corpusId, documentId): """Delete a single document an optionally its annotations""" try: delete_annotations_argument = self.get_query_argument( "deleteAnnotations", None) if not delete_annotations_argument: self.missing_required_field("deleteAnnotations") return delete_annotations = 'true' == delete_annotations_argument envId = get_env_id() authorization = get_autorisation(envId, None, None) corpus = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId) document = corpus.delete_document(documentId, delete_annotations) self.write_and_set_status(document, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status( {MESSAGE: "Specified document not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId, bucketId, annotationId): try: docType = self.get_argument("schemaType", None) if not docType: self.write_and_set_status({MESSAGE: "Missing schemaType."}, HTTPStatus.UNPROCESSABLE_ENTITY) return envId = get_env_id() authorization = get_autorisation(envId, None, None) anno = get_master_bucket_list(envId, authorization) \ .get_bucket(corpusId, bucketId) \ .get_annotation(annotationId, docType) annotationId = anno["id"] anno["annotationId"] = anno["id"] del anno["id"] self.write_and_set_status(anno, HTTPStatus.OK) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status({MESSAGE: "Annotation with provided id and schemaType does not exist"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) schemaTypesStr = self.get_query_argument("schemaTypes", None) bucketIdsStr = self.get_query_argument("bucketIds", None) schemaTypes = [] bucketIds = [] if schemaTypesStr: schemaTypes = schemaTypesStr.split(",") if bucketIdsStr: bucketIds = bucketIdsStr.split(",") batchCorpus = Corpus(envId, authorization, corpusId) zipPath = batchCorpus.create_tmp_annotations_zip( bucketIds, schemaTypes) self.send_zip_file_with_get(zipPath, os.path.basename(zipPath)) batchCorpus.clear_temporary_files() except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def delete(self, corpusId, bucketId, annotationId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) docType = self.get_argument("schemaType", None) if not docType: self.write_and_set_status( {MESSAGE: "Missing schemaType field, which links the annotation to its schema."}, HTTPStatus.NOT_FOUND) return get_master_bucket_list(envId, authorization) \ .get_bucket(corpusId, bucketId) \ .delete_annotation(annotationId, docType) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def delete(self, corpusId, bucketId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) corpus = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId) corpus.delete_bucket(bucketId) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except BucketNotFoundException as err: self.write_and_set_status( { MESSAGE: "Bucket does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.NOT_FOUND) except CorpusNotFoundException as err: self.write_and_set_status( { MESSAGE: "Corpus does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId, documentId): """Get a single document from corpus""" try: envId = get_env_id() authorization = get_autorisation(envId, None, None) corpus = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId) document = corpus.get_text_document(documentId) if document is None: raise DocumentNotFoundException(documentId) self.write_and_set_status(document, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status( {MESSAGE: "Specified document not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId, bucketId): try: schemaTypesArgument = self.get_query_argument("schemaTypes", default=None) if not schemaTypesArgument: self.write_and_set_status( {MESSAGE: "Missing schemaTypes parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return else: schemaTypes = schemaTypesArgument.split(",") envId = get_env_id() authorization = get_autorisation(envId, None, None) documentSearch = DocumentSearch(envId, authorization, [], corpusId) counts = documentSearch.count_annotations_for_types( bucketId, schemaTypes) self.write_and_set_status(counts, HTTPStatus.OK) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId): """Get documents from corpus according to pagination""" try: fromIndexArgument = self.get_query_argument("from") fromIndex = int(fromIndexArgument) if fromIndex < 0: self.write_and_set_status( {MESSAGE: "'from' must cannot be less than zero"}, HTTPStatus.UNPROCESSABLE_ENTITY) return sizeArgument = self.get_query_argument("size") size = int(sizeArgument) if size < 1: self.write_and_set_status( {MESSAGE: "'size' cannot be less than 1"}, HTTPStatus.UNPROCESSABLE_ENTITY) return size = min(size, MAX_DOCUMENT_SIZE) envId = get_env_id() authorization = get_autorisation(envId, None, None) corpus = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId) filterTitle = self.get_query_argument("filterTitle", default=None) filterSource = self.get_query_argument("filterSource", default=None) filterJoin = self.get_query_argument("filterJoin", default=None) sortBy = self.get_query_argument("sortBy", default=None) sortOrder = self.get_query_argument("sortOrder", default=None) documents = corpus.get_text_documents(fromIndex, size, sortBy, sortOrder, filterTitle, filterSource, filterJoin) self.write_and_set_status({"documents": documents}, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except ValueError as ve: self.write_and_set_status( {MESSAGE: "Invalid 'from' or 'size' parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) except TransportError as te: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "ES TransportError", TRACE: trace }, te.status_code) except Exception as e: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def put(self, corpusId, bucketId): try: body = json.loads(self.request.body.decode("utf-8")) envId = get_env_id() authorization = get_autorisation(envId, None, None) docType = None annotationId = None sett = get_settings() shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR'] if "annotationId" in body: annotationId = body["annotationId"] del body["annotationId"] else: self.write_and_set_status( {MESSAGE: "Missing annotationId field required to find an annotation to update."}, HTTPStatus.UNPROCESSABLE_ENTITY) return if "schemaType" in body: docType = body["schemaType"] else: self.write_and_set_status( {MESSAGE: "Missing schemaType field, which links the annotation to its schema."}, HTTPStatus.UNPROCESSABLE_ENTITY) return if "bucketId" in body: newBucketId = body["bucketId"] if newBucketId != bucketId: self.write_and_set_status( {MESSAGE: "bucketId from the path is different than bucketId in the body."}, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket = get_master_bucket_list(envId, authorization).get_bucket(corpusId, bucketId) storedAnnotation = bucket.get_annotation(id=annotationId, docType=docType) if storedAnnotation["schemaType"] != docType: self.write_and_set_status( {MESSAGE: "You cannot change the schemaType of an annotation."}, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket.update_annotation(body, docType, annotationId, shouldValidate) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def set_up_environment(): try: es_wait_ready() envId = get_env_id() authorization = get_autorisation(envId, None, None) envList = get_env_list(authorization) envList.get_env(envId) except EnvNotFoundException: es_wait_ready() envList.create_env(envId)
def post(self, corpusId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) body = json.loads(self.request.body.decode("utf-8")) zipFileName = body.get("zipFileName") destUrl = body.get("destUrl") isSendPut = body.get("isSendPut", True) schemaTypesStr = body.get("schemaTypes", None) bucketIdsStr = body.get("bucketIds", None) isMultipart = body.get("isMultipart", False) multipartFieldName = body.get("multipartFieldName", "") if not zipFileName: self.write_and_set_status( {MESSAGE: "Missing 'zipFileName' parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return if not destUrl: self.write_and_set_status( {MESSAGE: "Missing 'destUrl' parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return schemaTypes = [] bucketIds = [] if schemaTypesStr: schemaTypes = schemaTypesStr.split(",") if bucketIdsStr: bucketIds = bucketIdsStr.split(",") batchCorpus = Corpus(envId, authorization, corpusId) batchCorpus.upload_annotations(bucketIds, schemaTypes, destUrl, zipFileName, isSendPut, isMultipart, multipartFieldName) self.write_and_set_status({}, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except UploadUrlFailException as upErr: self.write_and_set_status( {MESSAGE: "Upload failed due: {0}".format(str(upErr))}, HTTPStatus.UNPROCESSABLE_ENTITY) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) corpora = get_master_document_corpus_list(envId, authorization) corporaInfos = corpora.get_corpuses_list() self.write_and_set_status({"data": corporaInfos}, HTTPStatus.OK) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def post(self): body = self.request.body.decode("utf-8") try: envId = get_env_id() authorization = get_autorisation(envId, None, None) json_args = json.loads(body) for requiredField in [CORPUS_LANGUAGES]: if requiredField not in json_args: self.write_and_set_status({MESSAGE: "Missing required parameters. {0}".format(requiredField)}, HTTPStatus.UNPROCESSABLE_ENTITY) return languages = json_args.get(CORPUS_LANGUAGES, None) try: languageManager = get_language_manager() for language in languages: if not languageManager.has_es_analyser(language): self.write_and_set_status({MESSAGE: "Invalid language: " + language}, HTTPStatus.UNPROCESSABLE_ENTITY) return except Exception as e: self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)}, HTTPStatus.UNPROCESSABLE_ENTITY) return corpusId = json_args.get(CORPUS_ID, None) if corpusId and not valid_es_id(corpusId): self.write_and_set_status({ MESSAGE: "Corpus id invalid '{0}' . CorpusId can only be lowercase,alphanumeric with -_".format( corpusId)}, HTTPStatus.UNPROCESSABLE_ENTITY) return corpora = get_master_document_corpus_list(envId, authorization) corpus = corpora.create_corpus(corpusId, languages) self.write_and_set_status({"id": corpus.id}, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except CorpusInvalidFieldException as ci: self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)}, HTTPStatus.UNPROCESSABLE_ENTITY) except CorpusAlreadyExistsException: self.write_and_set_status({MESSAGE: "Corpus with the same id already exists"}, HTTPStatus.CONFLICT) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def delete(self, corpusId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) corpora = get_master_document_corpus_list(envId, authorization) corpora.delete_corpus(corpusId) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self): try: fromIndexArgument = self.get_query_argument("from") fromIndex = int(fromIndexArgument) if fromIndex < 0: self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"}, HTTPStatus.UNPROCESSABLE_ENTITY) return sizeArgument = self.get_query_argument("size") size = int(sizeArgument) if size < 1: self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"}, HTTPStatus.UNPROCESSABLE_ENTITY) return try: corpusIdsArgument = self.get_query_argument("corpusIds", default=None) if not corpusIdsArgument: self.write_and_set_status({MESSAGE: "Missing corpusIds parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return else: corpusIds = corpusIdsArgument.split(",") except Exception as e: self.write_and_set_status({MESSAGE: "Invalid data passed in corpusIds parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return filters = parse_filters_argument(self.get_query_argument("filters", default=None)) filterJoin = self.get_query_argument("filterJoin", default=None) sortBy = self.get_query_argument("sortBy", default=None) sortOrder = self.get_query_argument("sortOrder", default=None) env_id = get_env_id() authorization = get_autorisation(env_id, None, None) mc = MultiCorpus(env_id, authorization) count, annotations = mc.get_annotations_of_type( corpusIds, SCHEMA_TYPE_DOCUMENT_METADATA, fromIndex, size, sortBy, sortOrder, filters, filterJoin) self.write_and_set_status({ "count": count, "annotations": annotations}, HTTPStatus.OK) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def partial_corpora_indices(corpus_ids: List[str]) -> str: # The idiomatic way would be to instantiate a corpus for each corpus Id and then do a search in each corpus # for the bucket with the right schema type. # As it represent a 2n operation before doing the main search, and fearing latency, # I, Jean-François Héon, decided to perform this (possibly premature) optimization. settings = get_settings() annotation_directory = settings['CLASSES']['DOCUMENT_DIRECTORY']['CLASS_PREFIX'] data_suffix = settings['CLASSES']['DOCUMENT_DIRECTORY']['INDEX_DATA_SUFFIX'] index_suffix = '*' + data_suffix + '_*' index_prefix = get_env_id() + annotation_directory indices = [] for corpus_id in corpus_ids: indices.append(index_prefix + corpus_id + index_suffix) joined_indices = ','.join(indices) return joined_indices
def post(self, corpusId): try: body = json.loads(self.request.body.decode("utf-8")) language = body.get("language") if not language: self.write_and_set_status( {MESSAGE: "Missing required parameters"}) self.set_status(HTTPStatus.UNPROCESSABLE_ENTITY) return envId = get_env_id() authorization = get_autorisation(envId, None, None) docId = body.get( "id") # Note: 'get' defaults to None when key does not exist text = body.get("text", "") title = body.get("title", "") source = body.get("source", "") corpus = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId) if not language in corpus.languages: self.write_and_set_status( { MESSAGE: "Document language do not correspond to corpus language" }, HTTPStatus.UNPROCESSABLE_ENTITY) return docId = corpus.add_text_document(text, title, language, docId, source) self.write_and_set_status({"id": docId}, HTTPStatus.OK) except DocumentAlreadyExistsException: self.write_and_set_status( {MESSAGE: "Document with the same id already exists"}, HTTPStatus.CONFLICT) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def post(self): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) envList = get_env_list(authorization) env = envList.get_env(envId) envList.delete_env(env.id) es_wait_ready() sleep(5) env = get_env_list(authorization).create_env(env.id, env.name) es_wait_ready() self.write_and_set_status(None, HTTPStatus.OK) except Exception: trace = traceback.format_exc().splitlines() self.write({ MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId): try: includeSchemaJson = 'true' == self.get_query_argument(INCLUDE_SCHEMA_JSON, default=False) envId = get_env_id() authorization = get_autorisation(envId, None, None) buckets = get_master_document_corpus_list(envId, authorization).get_corpus(corpusId).get_buckets() augmentedBuckets = [getBucketWithSchema(bucket, includeSchemaJson) for bucket in buckets] self.write_and_set_status({"buckets": augmentedBuckets}, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def post(self, corpusId, bucketId): try: body = json.loads(self.request.body.decode("utf-8")) envId = get_env_id() authorization = get_autorisation(envId, None, None) docType = None annotationId = None sett = get_settings() shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR'] if "annotationId" in body: annotationId = body["annotationId"] del body["annotationId"] if "schemaType" in body: docType = body["schemaType"] else: self.write_and_set_status( {MESSAGE: "Missing schemaType field, which links the annotation to its schema."}, HTTPStatus.UNPROCESSABLE_ENTITY) return annotationId = get_master_bucket_list(envId, authorization) \ .get_bucket(corpusId, bucketId) \ .add_annotation(body, docType, annotationId, shouldValidate) self.write_and_set_status({"id": annotationId}, HTTPStatus.OK) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentAlreadyExistsException: self.write_and_set_status({MESSAGE: "Annotation with the same id already exist"}, HTTPStatus.CONFLICT) except JSONDecodeError: self.write_and_set_status({MESSAGE: "Invalid JSON format for annotation"}, HTTPStatus.BAD_REQUEST) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def post(self, corpusId): try: body = json.loads(self.request.body.decode("utf-8")) envId = get_env_id() authorization = get_autorisation(envId, None, None) bucketId = None bucketName = None if "id" in body: bucketId = body["id"] if "name" in body: bucketName = body["name"] if bucketId and not valid_es_id(bucketId): self.write_and_set_status( { MESSAGE: "Bucket id invalid '{0}' . BucketId can only be lowercase,alphanumeric with -_" .format(bucketId) }, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket = get_master_document_corpus_list(envId, authorization). \ get_corpus(corpusId).create_bucket(bucketName, bucketId) self.write_and_set_status({"id": bucket.id}, HTTPStatus.OK) except BucketAlreadyExistsException: self.write_and_set_status( {MESSAGE: "Bucket with the same id already exists"}, HTTPStatus.CONFLICT) except CorpusNotFoundException as err: self.write_and_set_status( { MESSAGE: "Corpus does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.UNPROCESSABLE_ENTITY) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) documentCorpus = DocumentCorpus(envId, authorization, corpusId) zipPath = documentCorpus.get_documents_zip() zipName = os.path.basename(zipPath) self.send_zip_file_with_get(zipPath, zipName) documentCorpus.clear_temporary_files() except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() logger = logging.getLogger(__name__) logger.error(str(trace)) self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) corpora = get_master_document_corpus_list(envId, authorization) corpus = corpora.get_corpus(corpusId) info = { CORPUS_ID: corpus.id, CORPUS_LANGUAGES: corpus.languages, CORPUS_MODIFICATION_DATE: datetime_to_json_str(corpus.modificationDate), CORPUS_DOCUMENT_COUNT: corpus.get_documents_count() } self.write_and_set_status(info, HTTPStatus.OK) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def get(self, corpusId, bucketId, schemaType): try: fromIndexArgument = self.get_query_argument("from") fromIndex = int(fromIndexArgument) if fromIndex < 0: self.write_and_set_status({MESSAGE: "'from' must cannot be less than zero"}, HTTPStatus.UNPROCESSABLE_ENTITY) return sizeArgument = self.get_query_argument("size") size = int(sizeArgument) if size < 1: self.write_and_set_status({MESSAGE: "'size' cannot be less than 1"}, HTTPStatus.UNPROCESSABLE_ENTITY) return envId = get_env_id() authorization = get_autorisation(envId, None, None) documentSearch = DocumentSearch(envId, authorization, None, corpusId) filters = parse_filters_argument(self.get_query_argument("filters", default=None)) filterJoin = self.get_query_argument("filterJoin", default=None) sortBy = self.get_query_argument("sortBy", default=None) sortOrder = self.get_query_argument("sortOrder", default=None) count, annotations = documentSearch.search_annotations_for_one_type( bucketId, schemaType, fromIndex, size, sortBy, sortOrder, filters, filterJoin) self.write_and_set_status({ "count": count, "annotations": annotations}, HTTPStatus.OK) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def getAnnotations(self, corpusId, documentIds: List[str]): schemaTypesByBucketId = {} try: schemaTypes = self.get_arguments("schemaTypes") if not schemaTypes: self.write_and_set_status({MESSAGE: "Missing schemaTypes parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return schemaTypes = schemaTypes[0].split(",") # tornado crap syntax for bucketIdWithSchemaType in schemaTypes: bucketId = bucketIdWithSchemaType.split(":")[0] schemaType = bucketIdWithSchemaType.split(":")[1] if not bucketId in schemaTypesByBucketId: schemaTypesByBucketId[bucketId] = [] schemaTypesByBucketId[bucketId].append(schemaType) except Exception as e: self.write_and_set_status({MESSAGE: "Invalid data passed in schemaTypes parameter"}, HTTPStatus.UNPROCESSABLE_ENTITY) return # TODO i put some arbitrary large number for offset if not present offsetBegin = self.get_argument("offsetBegin", MIN_OFFSET_BEGIN) offsetEnd = self.get_argument("offsetEnd", MAX_OFFSET_END) envId = get_env_id() authorization = get_autorisation(envId, None, None) documentSearch = DocumentSearch(envId, authorization, documentIds, corpusId) offsets = None if not (offsetBegin == MIN_OFFSET_BEGIN and offsetEnd == MAX_OFFSET_END): offsets = [Interval(offsetBegin, offsetEnd, False, False, False)] res = documentSearch.get_annotations(schemaTypesByBucketId, offsets) if not res[corpusId]: self.write({}) else: self.write(res) self.write_and_set_status(None, HTTPStatus.OK)
def delete(self, corpusId, bucketId, schemaType): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) bucket = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId).get_bucket(bucketId) schemas = bucket.get_schemas_info(False) schemaTypes = [schema['schemaType'] for schema in schemas['data']] if not schemaType in schemaTypes: self.write_and_set_status( { MESSAGE: "Schema Type: {0} does not exist".format(schemaType) }, HTTPStatus.NOT_FOUND) return bucket.delete_schema_type(schemaType) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except CorpusNotFoundException as err: self.write_and_set_status( { MESSAGE: "Corpus does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.NOT_FOUND) except BucketNotFoundException as err: self.write_and_set_status( { MESSAGE: "Bucket does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def put(self, corpusId): try: body = self.request.body.decode("utf-8") envId = get_env_id() authorization = get_autorisation(envId, None, None) json_args = json.loads(body) try: languages = json_args.get(CORPUS_LANGUAGES, None) if languages: languageManager = get_language_manager() for language in languages: if not languageManager.has_es_analyser(language): self.write_and_set_status({MESSAGE: "Invalid language: " + language}, HTTPStatus.UNPROCESSABLE_ENTITY) return except Exception as e: self.write_and_set_status({MESSAGE: "Invalid languages field: " + str(languages)}, HTTPStatus.UNPROCESSABLE_ENTITY) return corpora = get_master_document_corpus_list(envId, authorization) corpus = corpora.update_corpus(corpusId, languages) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except CorpusInvalidFieldException as ci: self.write_and_set_status({MESSAGE: "Invalid field: {0}".format(ci)}, HTTPStatus.UNPROCESSABLE_ENTITY) except CorpusNotFoundException: self.write_and_set_status({MESSAGE: "Specified corpus not found"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def post(self, corpusId, bucketId): try: envId = get_env_id() authorization = get_autorisation(envId, None, None) # download file into temp url location tmpUploadFolder = get_jass_tmp_dir() fileinfo = self.request.files['file'][0] fname = fileinfo['filename'] ext = os.path.splitext(fname)[1] zipName = str(uuid1) + ext zipPath = os.path.join(tmpUploadFolder, zipName) f = open(zipPath, 'bw') f.write(fileinfo['body']) f.close() # add annotations in batch batchCorpus = Corpus(envId, authorization, corpusId) errors = batchCorpus.add_annotations(bucketId, zipPath) # delete zip file os.remove(zipPath) if errors: self.write_and_set_status(errors, HTTPStatus.UNPROCESSABLE_ENTITY) else: self.write_and_set_status(None, HTTPStatus.OK) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)
def corpus_from_id(corpus_id: str) -> DocumentCorpus: env_id = get_env_id() authorization = get_autorisation(env_id, None, None) corpora = get_master_document_corpus_list(env_id, authorization) return corpora.get_corpus(corpus_id)
def post(self, corpusId, bucketId): try: body = self.strip_body_bom() envId = get_env_id() authorization = get_autorisation(envId, None, None) if is_missing_required_fields( body, ["targetType", "schemaType", "properties"]): self.write_and_set_status( { MESSAGE: missing_fields_message( body, ["targetType", "schemaType", "properties"]) }, HTTPStatus.UNPROCESSABLE_ENTITY) return schemaType = body["schemaType"] targetTypeName = body["targetType"] if not TargetType.has(targetTypeName): self.write_and_set_status( { MESSAGE: "Target type {0} not supported".format(targetTypeName) }, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket = get_master_document_corpus_list( envId, authorization).get_corpus(corpusId).get_bucket(bucketId) schemas = bucket.get_schemas_info(False) schemaTypes = [schema['schemaType'] for schema in schemas['data']] if schemaType in schemaTypes: self.write_and_set_status( { MESSAGE: "A schema with the schemaType '{0}' is already bound to the bucket." .format(schemaType) }, HTTPStatus.FORBIDDEN) return # check if schema with the same has as the current annotation exist: targetType = TargetType(targetTypeName) nestedFields = [] if targetType == TargetType.document_surface1d: nestedFields.append("offsets") schemaId = get_schema_list(envId, authorization).add_json_schema_as_hash( body, False, nestedFields) bucket.add_or_update_schema_to_bucket(schemaId, schemaType, targetType, {}) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except CorpusNotFoundException as err: self.write_and_set_status( { MESSAGE: "Corpus does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.UNPROCESSABLE_ENTITY) except BucketNotFoundException as err: self.write_and_set_status( { MESSAGE: "Bucket does not exist.Extra info: '{0}'".format(err) }, HTTPStatus.UNPROCESSABLE_ENTITY) except SchemaBindingInvalid as err: self.write_and_set_status( {MESSAGE: "Schema Binding Invalid: '{0}'".format(err)}, HTTPStatus.UNPROCESSABLE_ENTITY) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status( { MESSAGE: "Internal server error", TRACE: trace }, HTTPStatus.INTERNAL_SERVER_ERROR)