def connect(self): """ Connects to MongoDB and verifies connection. """ try: host = settings.GetConfigValue("ServiceStockageAnnotations", "MONGO_HOST") port = int( settings.GetConfigValue("ServiceStockageAnnotations", "MongoPort")) self.client = MongoClient(host, port, connect=False) db = settings.GetConfigValue("ServiceStockageAnnotations", "MongoDb") self.mongoDb = db # Force connection test # https://api.mongodb.com/python/current/migrate-to-pymongo3.html#mongoclient-connects-asynchronously self.client.admin.command("ismaster") self.m_connected = True return True except pymongo.errors.ConnectionFailure: logger.logError(StorageException(1)) except Exception as e: logger.logUnknownError("Annotation Storage Create Document", "", e) self.m_connected = False return False
def internal_error500(error): logger.logUnknownError( "Annotation Storage REST Service Unknown Critical" " Error", str(error), 50000) return error_response(http.HTTPStatus.INTERNAL_SERVER_ERROR, "Internal Server Error", "", "Server can not currently process requests")
def getMongoDocumentS(self, jsonQuery, collection=None, **kwargs): """ Search a collection for documents. :@param documentIds: List of document IDs for which we should search annotations. :@param A queryToDetermine how to select annotations: see http://docs.mongodb.org/manual/reference/operator/query/ for options :@return the number of deleted documents """ if not collection: collection = self.mongoCollection if self.isConnected(): try: db = self.client[self.mongoDb] coll = db[collection] if kwargs is None: res = coll.find(jsonQuery) else: # Client might have called with None args. E.g. limit or skip cleaned_kwargs = clear_nones(kwargs) res = coll.find(jsonQuery, **cleaned_kwargs) return res except StorageException as e: raise e except Exception as e: logger.logUnknownError("Annotation Storage Get Document", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def aggregate(self, pipeline: list, collection: str, **kwargs): """ Aggregation pipeline. Implementation detail: this will pass the allowDiskTrue to MongoDB. :@param documentIds: List of document IDs for which we should search annotations. :@param A queryToDetermine how to select annotations: see http://docs.mongodb.org/manual/reference/operator/query/ for options :@return the number of deleted documents """ if not collection: collection = self.mongoCollection if self.isConnected(): try: db = self.client[self.mongoDb] coll = db[collection] res = coll.aggregate(pipeline, **kwargs) return res except StorageException as e: raise e except Exception as e: logger.logUnknownError("Annotation Storage Aggregate", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def deleteMongoDocumentS(self, jsonQuery, collection=None): """ Delete multiple annotations. :@param documentIds: List of document IDs which should be affected. :@param a queryToDetermine how to select annotations: see http://docs.mongodb.org/manual/reference/operator/query/ for options :@return The number of deleted documents """ if not collection: collection = self.mongoCollection if self.isConnected(): try: db = self.client[self.mongoDb] coll = db[collection] res = coll.remove(jsonQuery) if (res['ok'] != 1): raise StorageException(2) else: return res['n'] except StorageException as e: raise e except Exception as e: logger.logUnknownError("Annotation Storage Delete Document", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def connect(self): """ Connects to MongoDB and verifies connection. """ try: host = settings.GetConfigValue("ServiceStockageAnnotations", "MONGO_HOST") port = int(settings.GetConfigValue("ServiceStockageAnnotations", "MongoPort")) self.client = MongoClient(host, port, connect=False) db = settings.GetConfigValue("ServiceStockageAnnotations", "MongoDb") self.mongoDb = db # Force connection test # https://api.mongodb.com/python/current/migrate-to-pymongo3.html#mongoclient-connects-asynchronously self.client.admin.command("ismaster") self.m_connected = True return True except pymongo.errors.ConnectionFailure: logger.logError(StorageException(1)) except Exception as e: logger.logUnknownError("Annotation Storage Create Document", "", e) self.m_connected = False return False
def updateMongoDocument(self, jsonDoc, collection=None): # Note for now: We just create a new document. Reason: Simpler """ Updates an existing document, by replacing it with new contents. This function only validates the presence of the required fields. :Preconditions (Otherwise exception is thrown): * isConnected must be true, * required fields must be present :param jsonDoc: Document as a JSON document. The document needs to contain a valid id. :return : If the document to be updated is found, returns the id of the document. If it can not be found, raises an exception. Document content returned (mandatory). Other user fields may be present: :: { _id: Document id as a string @context: Context describing the format of the document } """ if not collection: collection = self.mongoCollection if self.isConnected(): if jsonDoc is None: raise MongoDocumentException(2) if '_id' in jsonDoc: doWithId = self.getMongoDocument(jsonDoc['_id']) if doWithId is None: # ID cannot be found logger.logInfo(MongoDocumentException(5, jsonDoc['_id'])) raise MongoDocumentException(5, jsonDoc['_id']) else: logger.logInfo(MongoDocumentException(5, "")) raise MongoDocumentException(5, "") mongo_utils.changeDocIdToMongoId(jsonDoc) try: db = self.client[self.mongoDb] coll = db[collection] doc_id = coll.save(jsonDoc) return str(doc_id) except Exception as e: logger.logUnknownError("Annotation Storage Update Document", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def deleteAnnotationS(self, documentIds, jsonSelect={}, storageType=1): """ Delete multiple annotations. :@param documentIds: List of documents containing the annotations. :@param jsonSelect: Additional query parameters, which can restrict the search: See: http://docs.mongodb.org/manual/reference/operator/query/ for options :@param storageType: Describe which annotation storage to search. Supports: 0,1,2 :@param batchFormat: Describes how the elements would be returned Supports : 0 :@return: Number of documents deleted. """ if not (self.__validateDocumentIds(documentIds)): return 0 self.__validateStorageByType(storageType) # This will change later on. count = 0 self.__setDocIdToJsonSelect(documentIds, jsonSelect) # print jsonSelect if (storageType == AnnotationManager.HUMAN_STORAGE or storageType == AnnotationManager.ALL_STORAGE): count += self.deleteMongoDocumentS(jsonSelect, self.storageCollections[AnnotationManager.HUMAN_STORAGE]) if (storageType == AnnotationManager.BATCH_STORAGE or storageType == AnnotationManager.ALL_STORAGE): # find all batches, delete batch content, then delete batch. batchDocs = self.getMongoDocumentS(jsonSelect, self.storageCollections[AnnotationManager.BATCH_STORAGE]) # print jsonSelect db = self.client[self.mongoDb] fs = gridfs.GridFS(db) # delete all the files for batch in batchDocs: try: annoFileID = batch["file_fs_id_batch"] if (fs.exists(annoFileID)): fs.delete(annoFileID) except Exception as e: # clean up file info so we dont have garbage in our db logger.logUnknownError("Annotation Storage Delete Annotations", "", e) # delete all the batches count += self.deleteMongoDocumentS(jsonSelect, self.storageCollections[AnnotationManager.BATCH_STORAGE]) return count
def createMongoDocument(self, jsonDoc, collection=None): """ Creates a new document. This function only validates the presence of the required fields. :preconditions (Otherwise exception is thrown): * isConnected must be true, * jsonDoc must exist and be a valid JSON object, :param jsonDoc: Contents of the document as string Here are the elements required by the document: :: { @context: context describing the format of the document } If the document contains the field _id, the _id field will be deleted and another _id field will be generated instead. The generated _id will be required to access the document. :param collection: Enables you to override the default collection if needed :return _id: The ID of the created document """ if not collection: collection = self.mongoCollection if self.isConnected(): if jsonDoc is None: raise MongoDocumentException(2) # We don't want the client to specify an id. if '_id' in jsonDoc: del jsonDoc["_id"] try: db = self.client[self.mongoDb] coll = db[collection] doc_id = coll.insert(jsonDoc) return str(doc_id) except Exception as e: logger.logUnknownError("Annotation Storage Create Document", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def _processCommonException(e): """ This function is used to generate exception codes. It will create absolute codes for reference """ if (isinstance(e, StorageException)): return error_response(http.HTTPStatus.SERVICE_UNAVAILABLE, "Service Unavailable", 53000 + e.code, "Error connecting to the backend storage") elif (isinstance(e, MongoDocumentException)): if (e.code == 0): return error_response(http.HTTPStatus.INTERNAL_SERVER_ERROR, "Internal Server Error", 52000, "Server can not currently process requests") else: return error_response(http.HTTPStatus.UNPROCESSABLE_ENTITY, "Cannot process Entity", 52000 + e.code, str(e)) elif (isinstance(e, AnnotationException)): if (e.code == 0): return error_response(http.HTTPStatus.INTERNAL_SERVER_ERROR, "Internal Server Error", 51000, "Server can not currently process requests") else: return error_response(http.HTTPStatus.UNPROCESSABLE_ENTITY, "Cannot process Entity", 51000 + e.code, str(e)) elif (isinstance(e, StorageRestExceptions)): if (e.code == 2 or e.code == 3): return error_response(http.HTTPStatus.NOT_FOUND, "Not Found", 50100 + e.code, str(e)) else: return error_response(http.HTTPStatus.UNPROCESSABLE_ENTITY, "Cannot process entity", 50100 + e.code, str(e)) elif (isinstance(e, BadRequest)): # Flask error return error_response(http.HTTPStatus.BAD_REQUEST, "Bad Request", "", "") else: logger.logUnknownError("Annotation Storage REST Service Unknown Error", str(e), 50000) return error_response(http.HTTPStatus.INTERNAL_SERVER_ERROR, "Internal Server Error", "", "Server can not currently process requests")
def __setDocIdToJsonSelect(self, documentIds, jsonSelect): """ set a filter by doc id """ if "doc_id" in jsonSelect: del jsonSelect["doc_id"] if "file_fs_id_batch" in jsonSelect: del jsonSelect["file_fs_id_batch"] docs = [] for docId in documentIds: docs.append(str(docId)) try: jsonSelect["doc_id"] = {"$in": docs} except Exception as e: logger.logUnknownError("Annotation Storage Get Doc Id", "Failed Delete Query", e)
def get_text_index_fields(self) -> list: if self.isConnected(): try: db = self.client[self.mongoDb] coll = db[self.storageCollections[AnnotationManager.HUMAN_STORAGE]] res = coll.list_indexes() # or index_information() ? for index in res: if "weights" in index: return index["weights"].keys() return [] except StorageException as e: raise e except Exception as e: logger.logUnknownError("Annotation Storage Aggregate", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def getMongoDocument(self, strDocId, collection=None): """ Returns a document This function only validates the presence of the required fields. :Preconditions (Otherwise exception is thrown): * isConnected must be true, :param strDocId: Document ID :return : If the document is found returns a json object of the document, otherwise returns None Document content returned (mandatory). Other user fields may be present: :: { _id: Document id as a string @context: context describing the format of the document } """ if not collection: collection = self.mongoCollection if self.isConnected(): try: db = self.client[self.mongoDb] coll = db[collection] doc = coll.find_one({"_id": ObjectId(strDocId)}) mongo_utils.changeDocIdToString(doc) return doc except InvalidId: return None except Exception as e: logger.logUnknownError("Annotation Storage Get Document", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def createAnnotationS(self, jsonBatch, strDocId, batchFormat=1, storageType=1): """ Inserts annotations by batch. All annotations must be valid. Raises an error if there is even a single invalid annotation. A valid annotation after processing has the following attributes: :doc_id: Describes the id of the document containing the annotation. Equals to strDocId. :@context: A field linking the context of the document. This field will be automatically created. _id: A unique id identifying the annotation, :@param jsonBatch : JSON of the message. See batch format on how this field is supposed to be structured. :@param strDocId : Id of the document containing the annotation :@param storageType : Describes how to store the elements. (Currently can not be changed) Supports: 1,2 :@param batchFormat : Describes the format of the elements to input. Supports: 0,1 @return: Number of created annotations. """ if (not mongo_utils.isObjectId(strDocId)): logger.logInfo(AnnotationException(1, strDocId)) raise AnnotationException(1, strDocId) self.__validateStorageByType(storageType) # We do not support you can not create in all storages. if (storageType == AnnotationManager.ALL_STORAGE): logger.logError(AnnotationException(7, storageType)) raise AnnotationException(7, storageType) # If the batch doesn't have data if 'data' not in jsonBatch: return 0 if (batchFormat != AnnotationManager.COMPACT_BATCH_FORMAT and batchFormat != AnnotationManager.BASIC_BATCH_FORMAT): logger.logInfo(AnnotationException(5, batchFormat)) raise AnnotationException(5, batchFormat) batchData = jsonBatch['data'] if (batchFormat == AnnotationManager.COMPACT_BATCH_FORMAT): if 'common' in jsonBatch: batchCommon = jsonBatch['common'] # Optimisations later for anno in batchData: for common in batchCommon: anno[common] = batchCommon[common] for anno in batchData: # We don't want the client to specify an id. if '_id' in anno: del anno["_id"] if (self.isConnected()): try: # make each annotation reference its document for anno in batchData: anno['doc_id'] = strDocId db = self.client[self.mongoDb] coll = db[self.storageCollections[storageType]] if (storageType == 1): # Insert annotations one by one. nbAnnoToInsert = len(batchData) nbInserted = len(coll.insert(batchData)) if (nbAnnoToInsert != nbInserted): # TODO: Delete all annotations if this happens raise AnnotationException(8, nbInserted, nbAnnoToInsert) return nbInserted else: # Batch storage, save as files fs = gridfs.GridFS(db) batchDoc = {} for anno in batchData: if (batchDoc == {}): # Possible common attributes between annotations. for attrib in anno: batchDoc[attrib] = anno[attrib] # IF an annotation have a different value for an attribute, then the # common attribute, the common attribute must be deleted. for attrib in anno: if (str(attrib) in batchDoc): if (anno[attrib] != batchDoc[str(attrib)]): del batchDoc[attrib] # Add id anno["id"] = str(ObjectId()) jsonDump = json.dumps(batchData).encode("UTF-8") annoFileID = fs.put(jsonDump) nbInserted = len(batchData) if 'common' in jsonBatch: batchCommon = jsonBatch['common'] for common in batchCommon: batchDoc[common] = batchCommon[common] batchDoc['doc_id'] = str(strDocId) batchDoc['file_fs_id_batch'] = annoFileID try: batch_id = coll.insert(batchDoc) except Exception as e: # clean up file info so we dont have garbage in our db logger.logUnknownError("Annotation Storage Create Annotations", "", e) fs.delete(annoFileID) raise MongoDocumentException(0) return nbInserted except AnnotationException as e: logger.logError(e) raise e except Exception as e: logger.logUnknownError("Annotation Storage Create Annotations", "", e) raise MongoDocumentException(0) else: raise StorageException(1)
def internal_error500(error): logger.logUnknownError("Annotation Storage REST Service Unknown Critical" " Error", str(error), 50000) return error_response(http.HTTPStatus.INTERNAL_SERVER_ERROR, "Internal Server Error", "", "Server can not currently process requests")