def _request_detections(**kwargs): try: body = kwargs.get('post_body') input_container_sas = body.get('input_container_sas', None) use_url = body.get('use_url', False) images_requested_json_sas = body.get('images_requested_json_sas', None) image_path_prefix = body.get('image_path_prefix', None) first_n = body.get('first_n', None) first_n = int(first_n) if first_n else None sample_n = body.get('sample_n', None) sample_n = int(sample_n) if sample_n else None model_version = body.get('model_version', '') if model_version == '': model_version = api_config.AML_CONFIG['default_model_version'] model_name = api_config.AML_CONFIG['models'][model_version] # request_name and request_submission_timestamp are for appending to output file names request_name = body.get('request_name', '') request_submission_timestamp = orchestrator.get_utc_timestamp() request_id = kwargs['request_id'] api_task_manager.UpdateTaskStatus(request_id, get_task_status('running', 'Request received.')) print(('runserver.py, request_id {}, model_version {}, model_name {}, request_name {}, submission timestamp ' 'is {}').format(request_id, model_version, model_name, request_name, request_submission_timestamp)) # image_paths can be a list of strings (paths on Azure blobs or public URLs), or a list of lists, # each of length 2 and is the [image_id, metadata] pair # case 1 - listing all images in the container if images_requested_json_sas is None: metadata_available = False # not possible to have attached metadata if listing images in a blob api_task_manager.UpdateTaskStatus(request_id, get_task_status('running', 'Listing all images to process.')) print('runserver.py, running - listing all images to process.') # list all images to process blob_prefix = None if image_path_prefix is None else image_path_prefix image_paths = SasBlob.list_blobs_in_container(api_config.MAX_NUMBER_IMAGES_ACCEPTED, sas_uri=input_container_sas, blob_prefix=blob_prefix, blob_suffix='.jpg') # case 2 - user supplied a list of images to process; can include metadata else: print('runserver.py, running - using provided list of images.') image_paths_text = SasBlob.download_blob_to_text(images_requested_json_sas) image_paths = json.loads(image_paths_text) print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths))) if len(image_paths) == 0: api_task_manager.UpdateTaskStatus(request_id, get_task_status('completed', 'Zero images found in provided list of images.')) return error, metadata_available = orchestrator.validate_provided_image_paths(image_paths) if error is not None: raise ValueError('image paths provided in the json are not valid: {}'.format(error)) valid_image_paths = [] for p in image_paths: locator = p[0] if metadata_available else p if locator.lower().endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS): valid_image_paths.append(p) image_paths = valid_image_paths print('runserver.py, length of image_paths provided by the user, after filtering to jpg: {}'.format( len(image_paths))) valid_image_paths = [] if image_path_prefix is not None: for p in image_paths: locator = p[0] if metadata_available else p if locator.startswith(image_path_prefix): valid_image_paths.append(p) image_paths = valid_image_paths print( 'runserver.py, length of image_paths provided by the user, after filtering for image_path_prefix: {}'.format( len(image_paths))) if not use_url: res = orchestrator.spot_check_blob_paths_exist(image_paths, input_container_sas, metadata_available) if res is not None: raise LookupError( 'path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.'.format( res)) # apply the first_n and sample_n filters if first_n is not None: assert first_n > 0, 'parameter first_n is 0.' image_paths = image_paths[:first_n] # will not error if first_n > total number of images if sample_n is not None: assert sample_n > 0, 'parameter sample_n is 0.' if sample_n > len(image_paths): raise ValueError( 'parameter sample_n specifies more images than available (after filtering by other provided params).') # we sample by shuffling the image paths and take the first sample_n images print('First path before shuffling:', image_paths[0]) shuffle(image_paths) print('First path after shuffling:', image_paths[0]) image_paths = image_paths[:sample_n] image_paths = orchestrator.sort_image_paths(image_paths, metadata_available) num_images = len(image_paths) print('runserver.py, num_images after applying all filters: {}'.format(num_images)) if num_images < 1: api_task_manager.UpdateTaskStatus(request_id, get_task_status('completed', 'Zero images found in container or in provided list of images after filtering with the provided parameters.')) return if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED: api_task_manager.UpdateTaskStatus(request_id, get_task_status('failed', 'The number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.'.format( num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED))) return # finalized image_paths is uploaded to internal_container; all sharding and scoring use the uploaded list image_paths_string = json.dumps(image_paths, indent=1) internal_storage_service.create_blob_from_text(internal_container, '{}/{}_images.json'.format(request_id, request_id), image_paths_string) # the list of images json does not have request_name or timestamp in the file name so that score.py can locate it api_task_manager.UpdateTaskStatus(request_id, get_task_status('running', 'Images listed; processing {} images.'.format(num_images))) print('runserver.py, running - images listed; processing {} images.'.format(num_images)) # set up connection to AML Compute and data stores # do this for each request since pipeline step is associated with the data stores aml_compute = orchestrator.AMLCompute(request_id=request_id, use_url=use_url, input_container_sas=input_container_sas, internal_datastore=internal_datastore, model_name=model_name) print('AMLCompute resource connected successfully.') num_images_per_job = api_config.NUM_IMAGES_PER_JOB num_jobs = math.ceil(num_images / num_images_per_job) list_jobs = {} for job_index in range(num_jobs): begin, end = job_index * num_images_per_job, (job_index + 1) * num_images_per_job job_id = 'request{}_jobindex{}_total{}'.format(request_id, job_index, num_jobs) list_jobs[job_id] = {'begin': begin, 'end': end} list_jobs_submitted = aml_compute.submit_jobs(list_jobs, api_task_manager, num_images) api_task_manager.UpdateTaskStatus(request_id, get_task_status('running', 'All {} images submitted to cluster for processing.'.format( num_images))) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, get_task_status('failed', 'An error occurred while processing the request: {}'.format( e))) print('runserver.py, exception in _request_detections: {}'.format(str(e))) return # do not initiate _monitor_detections_request try: aml_monitor = orchestrator.AMLMonitor(request_id=request_id, list_jobs_submitted=list_jobs_submitted, request_name=request_name, request_submission_timestamp=request_submission_timestamp, model_version=model_version) # start another thread to monitor the jobs and consolidate the results when they finish ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request, 'post:_monitor_detections_request', request_id=request_id, aml_monitor=aml_monitor) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, get_task_status('problem', ( 'An error occurred when starting the status monitoring process. ' 'The images should be submitted for processing though - please contact us to retrieve your results. ' 'Error: {}'.format(e)))) print('runserver.py, exception when starting orchestrator.AMLMonitor: ', str(e))
def aggregate_results(self): print('AMLMonitor, aggregate_results() called') # The more efficient method is to know the run_id which is the folder name that the result is written to. # Since we can't reliably get the run_id after submitting the run, resort to listing all blobs in the output # container and match by the request_id # listing all (up to a large limit) because don't want to worry about generator next_marker datastore_aml_container = copy.deepcopy(self.internal_datastore) datastore_aml_container['container_name'] = self.aml_output_container list_blobs = SasBlob.list_blobs_in_container( api_config.MAX_BLOBS_IN_OUTPUT_CONTAINER, datastore=datastore_aml_container, blob_suffix='.csv') detection_results = [] failures = [] for blob_path in list_blobs: if blob_path.endswith('.csv'): # blob_path is azureml/run_id/output_requestID/out_file_name.csv out_file_name = blob_path.split('/') out_file_name = out_file_name[-1] if out_file_name.startswith('detections_request{}_'.format( self.request_id)): detection_results.append( self._download_read_csv(blob_path, 'detections')) elif out_file_name.startswith('failures_request{}_'.format( self.request_id)): failures.extend( self._download_read_csv(blob_path, 'failures')) if len(detection_results) < 1: raise RuntimeError(( 'aggregate_results(), at least part of your request has been processed but monitoring ' 'thread failed to retrieve the results.')) all_detections = pd.concat( detection_results ) # will error if detection_results is an empty list print('aggregate_results(), shape of all_detections: {}'.format( all_detections.shape)) all_detections_string = all_detections.to_csv( index=False) # a string is returned since no file/buffer provided print('aggregate_results(), number of failed images: {}'.format( len(failures))) failures_text = os.linesep.join(failures) print('aggregate_results(), starts to upload') # upload aggregated results to output_store self.internal_storage_service.create_blob_from_text( self.internal_container, '{}/{}_detections.csv'.format(self.request_id, self.request_id), all_detections_string, max_connections=4) print('aggregate_results(), detections uploaded') self.internal_storage_service.create_blob_from_text( self.internal_container, '{}/{}_failed_images.csv'.format(self.request_id, self.request_id), failures_text) print('aggregate_results(), failures uploaded') output_file_urls = self._generate_urls_for_outputs() return output_file_urls
def _request_detections(**kwargs): try: body = kwargs.get('post_body') input_container_sas = body['input_container_sas'] images_requested_json_sas = body.get('images_requested_json_sas', None) image_path_prefix = body.get('image_path_prefix', '') first_n = body.get('first_n', None) first_n = int(first_n) if first_n else None sample_n = body.get('sample_n', None) sample_n = int(sample_n) if sample_n else None request_id = kwargs['request_id'] api_task_manager.UpdateTaskStatus(request_id, 'running.') if images_requested_json_sas is None: api_task_manager.UpdateTaskStatus(request_id, 'running - listing all images to process.') print('runserver.py, running - listing all images to process.') # list all images to process image_paths = SasBlob.list_blobs_in_container(api_config.MAX_NUMBER_IMAGES_ACCEPTED, sas_uri=input_container_sas, blob_prefix=image_path_prefix, blob_suffix='.jpg') else: print('runserver.py, running - using provided list of images.') image_paths_text = SasBlob.download_blob_to_text(images_requested_json_sas) image_paths = json.loads(image_paths_text) print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths))) image_paths = [i for i in image_paths if str(i).lower().endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS)] print('runserver.py, length of image_paths provided by the user, after filtering to jpg: {}'.format(len(image_paths))) res = orchestrator.spot_check_blob_paths_exist(image_paths, input_container_sas) if res is not None: raise LookupError('failed - path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.'.format(res)) # apply the first_n and sample_n filters if first_n is not None: assert first_n > 0, 'parameter first_n is zero.' image_paths = image_paths[:first_n] # TODO implement sample_n num_images = len(image_paths) print('runserver.py, num_images: {}'.format(num_images)) if num_images < 1: api_task_manager.UpdateTaskStatus(request_id, 'completed - zero images found in container or in provided list of images after filtering with the provided parameters.') return if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED: api_task_manager.UpdateTaskStatus(request_id, 'failed - the number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.'.format( num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED)) return image_paths_string = json.dumps(image_paths, indent=2) internal_storage_service.create_blob_from_text(internal_container, '{}/{}_images.json'.format(request_id, request_id), image_paths_string) api_task_manager.UpdateTaskStatus(request_id, 'running - images listed; processing {} images.'.format(num_images)) print('runserver.py, running - images listed; processing {} images.'.format(num_images)) # set up connection to AML Compute and data stores # do this for each request since pipeline step is associated with the data stores aml_compute = orchestrator.AMLCompute(request_id, input_container_sas, internal_datastore) print('AMLCompute resource connected successfully.') num_images_per_job = api_config.NUM_IMAGES_PER_JOB num_jobs = math.ceil(num_images / num_images_per_job) list_jobs = {} for job_index in range(num_jobs): begin, end = job_index * num_images_per_job, (job_index + 1) * num_images_per_job job_id = 'request{}_jobindex{}_total{}'.format(request_id, job_index, num_jobs) list_jobs[job_id] = { 'begin': begin, 'end': end } # TODO send list_jobs_submitted in a pickle to intermediate storage as a record / for restarting the monitoring thread list_jobs_submitted = aml_compute.submit_jobs(request_id, list_jobs, api_task_manager, num_images) api_task_manager.UpdateTaskStatus(request_id, 'running - all {} images submitted to cluster for processing.'.format(num_images)) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, 'failed - an error occurred while processing the request: {}'.format(str(e))) print('runserver.py, exception in _request_detections: {}'.format(str(e))) return # do not initiate _monitor_detections_request try: aml_monitor = orchestrator.AMLMonitor(request_id, list_jobs_submitted) # start another thread to monitor the jobs and consolidate the results when they finish ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request, 'post:_monitor_detections_request', request_id=request_id, aml_monitor=aml_monitor) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, 'failed - an error occurred when starting the status monitoring process. ' + 'The images should be submitted for processing though - please contact us to retrieve your results. Error: {}'.format(str(e))) print('runserver.py, exception when starting orchestrator.AMLMonitor: ', str(e))