async def save_model_to_disk(training_id: str = '', model: UploadFile = File(...)): """ Receives a .zip file containing a tensorflow2 SavedModel object sent by a dataset microservice. Then, will store the .zip file in trained model docker volume, with the naming format of <training_id>.zip :param training_id: Training ID the model is associated with :param model: .zip file containing SavedModel object :return: {'status': 'success'} if saving is successful, else {'status': 'failure'} """ logger.debug('Training ID: ' + training_id) if not get_training_result_by_training_id(training_id): return { 'status': 'failure', 'detail': 'Unable to find training result with specified ID', 'training_id': training_id } upload_folder = open(os.path.join('/app/training_results', model.filename), 'wb+') shutil.copyfileobj(model.file, upload_folder) upload_folder.close() return { 'status': 'success', 'detail': 'Training results uploaded successfully', 'training_id': training_id }
def ping_dataset(dataset_name): """ Periodically ping a dataset's service to make sure that it is active and able to receive requests. If it's not, remove the dataset from the available_datasets map. This is a helper method that is not directly exposed via HTTP. :param dataset_name: Name of a registered dataset as a string """ dataset_is_alive = True def kill_dataset(): settings.available_datasets.pop(dataset_name) nonlocal dataset_is_alive dataset_is_alive = False logger.debug("Dataset " + dataset_name + " is not responsive. Removing from available services...") while dataset_is_alive and not dependency.shutdown: try: r = requests.get('http://host.docker.internal:' + str(settings.available_datasets[dataset_name]) + '/status') r.raise_for_status() for increment in range(dependency.WAIT_TIME): if not dependency.shutdown: # Check between increments to stop hanging on shutdown time.sleep(1) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.HTTPError): kill_dataset() return if dependency.shutdown: logger.debug("Dataset [" + dataset_name + "] Healthcheck Thread Terminated.")
def kill_model(): settings.available_models.pop(model_name) nonlocal model_is_alive model_is_alive = False logger.debug( "Model " + model_name + " is not responsive. Removing the model from available services..." )
def register_dataset(dataset: MicroserviceConnection): """ Register a single dataset to the server by adding the name and port to available dataset settings. Also kick start a separate thread to keep track of the dataset service status. A valid dataset API key must be in the request header for this method to run. :param dataset: MicroserviceConnection object with name and port of dataset :return: {'status': 'success'} if saving is successful, else {'status': 'failure'} """ # Do not accept calls if server is in process of shutting down if dependency.shutdown: return JSONResponse( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, content={ 'status': 'failure', 'detail': 'Server is shutting down. Unable to complete new dataset registration.' }) # Do not add duplicates of running datasets to server if dataset.name in settings.available_datasets: return { "status": "success", 'dataset': dataset.name, 'detail': 'Dataset has already been registered.' } # Ensure that we can connect back to dataset before adding it try: r = requests.get('http://host.docker.internal:' + str(dataset.port) + '/status') r.raise_for_status() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.HTTPError): return { "status": "failure", 'dataset': dataset.name, 'detail': 'Unable to establish successful connection to dataset.' } # Register dataset to server and create thread to ensure dataset is responsive settings.available_datasets[dataset.name] = dataset.port pool.submit(ping_dataset, dataset.name) logger.debug("Dataset " + dataset.name + " successfully registered to server.") return { "status": "success", 'dataset': dataset.name, 'detail': 'Dataset has been successfully registered to server.' }
def register_model(model: MicroserviceConnection): """ Register a single model to the server by adding the model's name and socket to available model settings. Also kick start a separate thread to keep track of the model service status. Models that are registered must use a valid API key. :param model: MicroserviceConnection object with the model name and model socket. :return: {'status': 'success'} if registration successful else {'status': 'failure'} """ # Do not accept calls if server is in process of shutting down if dependency.shutdown: return JSONResponse( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, content={ 'status': 'failure', 'detail': 'Server is shutting down. Unable to complete new model registration.' }) # Do not add duplicates of running models to server if model.name in settings.available_models: return { "status": "success", 'model': model.name, 'detail': 'Model has already been registered.' } # Ensure that we can connect back to model before adding it try: r = requests.get(model.socket + '/status') r.raise_for_status() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.HTTPError): return { "status": "failure", 'model': model.name, 'detail': 'Unable to establish successful connection to model.' } # Register model to server and create thread to ensure model is responsive settings.available_models[model.name] = model.socket pool.submit(ping_model, model.name) logger.debug("Model " + model.name + " successfully registered to server.") return { "status": "success", 'model': model.name, 'detail': 'Model has been successfully registered to server.' }
def current_user_investigator(token: str = Depends(oauth2_scheme)): """ Permission Checking Function to be used as a Dependency for API endpoints. This is used as a helper. This will either return a User object to the calling method if the user meets the authentication requirements, or it will raise a CredentialException and prevent the method that depends on this from continuing. :param token: User authentication token :return: User object if user has correct role, else raise dependency.CredentialException """ user = get_current_user(token) if not any(role in [Roles.admin.name, Roles.investigator.name] for role in user.roles): logger.debug('User Roles') logger.debug(user.roles) raise CredentialException() return user
def ping_model(model_name): """ Periodically ping a model's service to make sure that it is active. If it's not, remove the model from the available_models BaseSetting in dependency.py :param model_name: Name of model to ping. This is the name the model registered to the server with. """ model_is_alive = True def kill_model(): settings.available_models.pop(model_name) nonlocal model_is_alive model_is_alive = False logger.debug( "Model " + model_name + " is not responsive. Removing the model from available services..." ) while model_is_alive and not dependency.shutdown: try: r = requests.get('http://host.docker.internal:' + str(settings.available_models[model_name]) + '/status') r.raise_for_status() for increment in range(dependency.WAIT_TIME): if not dependency.shutdown: # Check between increments to stop hanging on shutdown time.sleep(1) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.HTTPError): kill_model() return if dependency.shutdown: logger.debug("Model [" + model_name + "] Healthcheck Thread Terminated.")
def kill_dataset(): settings.available_datasets.pop(dataset_name) nonlocal dataset_is_alive dataset_is_alive = False logger.debug("Dataset " + dataset_name + " is not responsive. Removing from available services...")
def create_new_prediction_on_image( images: List[UploadFile] = File(...), models: List[str] = (), current_user: User = Depends(current_user_investigator)): """ Create a new prediction request for any number of images on any number of models. This will enqueue the jobs and a worker will process them and get the results. Once this is complete, a user may later query the job status by the unique key that is returned from this method for each image uploaded. :param current_user: User object who is logged in :param images: List of file objects that will be used by the models for prediction :param models: List of models to run on images :return: Unique keys for each image uploaded in images. """ # Start with error checking on the models list. # Ensure that all desired models are valid. if not models: return HTTPException( status_code=400, detail="You must specify models to process images with") invalid_models = [] for model in models: if model not in settings.available_models: invalid_models.append(model) if invalid_models: error_message = "Invalid Models Specified: " + ''.join(invalid_models) return HTTPException(status_code=400, detail=error_message) # Now we must hash each uploaded image # After hashing, we will store the image file on the server. buffer_size = 65536 # Read image data in 64KB Chunks for hashlib hashes_md5 = {} # Process uploaded images for upload_file in images: file = upload_file.file md5 = hashlib.md5() while True: data = file.read(buffer_size) if not data: break md5.update(data) # Process image hash_md5 = md5.hexdigest() hashes_md5[upload_file.filename] = hash_md5 file.seek(0) if get_image_by_md5_hash_db(hash_md5): image_object = get_image_by_md5_hash_db(hash_md5) else: # If image does not already exist in db # Create a UniversalMLImage object to store data image_object = UniversalMLImage( **{ 'file_names': [upload_file.filename], 'hash_md5': hash_md5, 'hash_sha1': 'TODO: Remove This Field', 'hash_perceptual': 'TODO: Remove This Field', 'users': [current_user.username], 'models': {} }) # Add created image object to database add_image_db(image_object) # Associate the current user with the image that was uploaded add_user_to_image(image_object, current_user.username) # Associate the name the file was uploaded under to the object add_filename_to_image(image_object, upload_file.filename) # Copy image to the temporary storage volume for prediction new_filename = hash_md5 + os.path.splitext(upload_file.filename)[1] stored_image_path = "/app/prediction_images/" + new_filename stored_image = open(stored_image_path, 'wb+') shutil.copyfileobj(file, stored_image) for model in models: model_socket = settings.available_models[model] try: logger.debug('Creating Prediction Request. Hash: ' + hash_md5 + ' Model: ' + model) request = requests.post(model_socket + '/predict', params={ 'image_md5_hash': hash_md5, 'image_file_name': new_filename }) request.raise_for_status( ) # Ensure prediction job hasn't errored. except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.HTTPError): logger.error( 'Fatal error when creating prediction request. Hash: "' + hash_md5 + '" Model: ' + model) return {"images": [hashes_md5[key] for key in hashes_md5]}
def create_new_prediction_on_image( images: List[UploadFile] = File(...), models: List[str] = (), current_user: User = Depends(current_user_investigator)): """ Create a new prediction request for any number of images on any number of models. This will enqueue the jobs and a worker will process them and get the results. Once this is complete, a user may later query the job status by the unique key that is returned from this method for each image uploaded. :param current_user: User object who is logged in :param images: List of file objects that will be used by the models for prediction :param models: List of models to run on images :return: Unique keys for each image uploaded in images. """ # Start with error checking on the models list. # Ensure that all desired models are valid. if not models: return HTTPException( status_code=400, detail="You must specify models to process images with") invalid_models = [] for model in models: if model not in settings.available_models: invalid_models.append(model) if invalid_models: error_message = "Invalid Models Specified: " + ''.join(invalid_models) return HTTPException(status_code=400, detail=error_message) # Now we must hash each uploaded image # After hashing, we will store the image file on the server. buffer_size = 65536 # Read image data in 64KB Chunks for hashlib hashes_md5 = {} # Process uploaded images for upload_file in images: file = upload_file.file md5 = hashlib.md5() sha1 = hashlib.sha1() while True: data = file.read(buffer_size) if not data: break md5.update(data) sha1.update(data) # Process image hash_md5 = md5.hexdigest() hash_sha1 = sha1.hexdigest() hashes_md5[upload_file.filename] = hash_md5 if get_image_by_md5_hash_db(hash_md5): image_object = get_image_by_md5_hash_db(hash_md5) else: # If image does not already exist in db # Generate perceptual hash hash_perceptual = str( imagehash.phash(Image.open('/app/images/' + file_name))) # Create a UniversalMLImage object to store data image_object = UniversalMLImage( **{ 'file_names': [upload_file.filename], 'hash_md5': hash_md5, 'hash_sha1': hash_sha1, 'hash_perceptual': hash_perceptual, 'users': [current_user.username], 'models': {} }) # Add created image object to database add_image_db(image_object) # Associate the current user with the image that was uploaded add_user_to_image(image_object, current_user.username) # Associate the name the file was uploaded under to the object add_filename_to_image(image_object, upload_file.filename) for model in models: random_tail = ''.join( random.choices(string.ascii_uppercase + string.digits, k=10)) job_id = hash_md5 + '---' + model + '---' + random_tail model_socket = settings.available_models[model] logger.debug('Adding Job For For Image ' + hash_md5 + ' With Model ' + model + ' With ID ' + job_id) # Submit a job to use scene detection model prediction_queue.enqueue(get_model_prediction, model_socket, hash_md5, model, upload_file, job_id=job_id) return {"images": [hashes_md5[key] for key in hashes_md5]}