def test_construct_cache_key(self): '''Tests that ending &file_type is removed''' cache_key = cu.construct_cache_key(self.query_string_dirty) assert cache_key == self.query_string_clean cache_key = cu.construct_cache_key(self.query_string_clean) assert cache_key == self.query_string_clean
def lesion_analysis(lesion_code, threshold): cache_key = cu.construct_cache_key(request.query_string.decode('utf-8')) # get the request query request_query = jquery_unparam(request.query_string.decode('utf-8')) current_app.logger.info( f'Running lesion analysis for lesion id {lesion_code}, threshold {threshold} and query {json.dumps(request_query, indent=4)}' ) #subject_ids_dataset_path = dbu.subject_id_dataset_file_path(request_query) file_path_data = dbu.density_map_file_path_data(request_query) if not len(file_path_data): current_app.logger.info( f'No subjects in query {json.dumps(request_query, indent=4)}') return 'No subjects in dataset query', 400 try: threshold = int( threshold ) * 0.01 # scale threshold to 0 - 1 since density map is stored in this range except ValueError: current_app.logger.info( f'Invalid threshold value {threshold} applied, returning 404...') return f'Invalid threshold value {threshold} sent to server.', 404 data_dir = current_app.config['DATA_FILE_PATH'] if lesion_code == 'example': lesion_data = du.get_nifti_data( f'{data_dir}/{du.EXAMPLE_LESION_FILE_NAME}') else: lesion_upload = LesionUpload.query.get(lesion_code) if not lesion_upload: current_app.logger.warn( f'Lesion does not exist in database with code {lesion_code}') return 'Lesion code does not exist. Please re-upload lesion.', 500 lesion_data = du.get_nifti_data(lesion_upload.saved_file_name) rh = nib.load(current_app.config['RIGHT_HEMISPHERE_MASK']).get_data() lh = nib.load(current_app.config['LEFT_HEMISPHERE_MASK']).get_data() rh_overlap = lesion_data * rh lh_overlap = lesion_data * lh intersecting_tracts = [] def check_lesion_tract_overlaps(tracts): #cached_data = current_app.cache.get(cache_key) cache = JobCache(current_app.cache, current_app.cache_lock) for tract in tracts: # average density maps for this tract based on current query # save averaged map and cache the file path status = cache.job_status(cache_key, tract.code) if status == 'COMPLETE': # get file path from cache tract_file_path = cache.job_result(cache_key, tract.code) else: # recalculate density map file_path_data = dbu.density_map_file_path_data(request_query) if len(file_path_data) > 0: current_app.logger.info(f'Adding job {tract.code}') cache.add_job(cache_key, tract.code) data_dir = current_app.config[ 'DATA_FILE_PATH'] # file path to data folder tract_file_path = du.generate_average_density_map( data_dir, file_path_data, tract, 'MNI') cache.job_complete(cache_key, tract.code, tract_file_path) current_app.logger.info(f'Job {tract.code} complete') else: current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return 'No subjects returned for the current query', 404 # # perform weighted overlap: lesion * tract # tract_data = du.get_nifti_data(tract_file_path) # overlap = lesion_data * tract_data # # weighted sum of voxels occupied by overlap # # figure out percentage of tract overlapping with lesion # overlap_sum = np.sum(overlap) # if overlap_sum: # overlap_score = overlap_sum / np.sum(tract_data) # # add dict to intersecting_tracts list # intersecting_tracts.append({"tractCode": tract.code, "overlapScore": overlap_score}) # alternative overlap score tract_data = du.get_nifti_data(tract_file_path) masked_tract_data = ma.masked_where(tract_data < threshold, tract_data) overlap = lesion_data * masked_tract_data over_threshold_count = masked_tract_data.count() over_threshold_overlap_count = len(overlap.nonzero()[0]) if over_threshold_overlap_count: overlap_percent = (over_threshold_overlap_count / over_threshold_count) * 100. # add dict to intersecting_tracts list intersecting_tracts.append({ "tractName": tract.name, "tractCode": tract.code, "overlapScore": overlap_percent, "description": tract.description }) '''Can speed up the loop through tracts by using multiprocessing pool''' # get unique tract codes for the datasets / methods selected tract_codes = set() for key in request_query.keys(): dc = key mc = request_query[key]['method'] tcs = DatasetTracts.query.with_entities( DatasetTracts.tract_code).filter( (DatasetTracts.dataset_code == dc) & (DatasetTracts.method_code == mc)).all() tcs = set(tcs) tract_codes = tract_codes or tcs tract_codes = tract_codes.intersection(tcs) # explode the inner tuples tract_codes = [tc[0] for tc in tract_codes] if np.any(rh_overlap): current_app.logger.info( 'Checking lesion overlap with right hemisphere tracts.') # loop through right hemisphere tracts tracts = Tract.query.filter( Tract.code.in_(tract_codes) & Tract.code.like('%\_R')).all() # escape sql wildcard _ check_lesion_tract_overlaps(tracts) if np.any(lh_overlap): current_app.logger.info( 'Checking lesion overlap with left hemisphere tracts.') # loop through left hemisphere tracts tracts = Tract.query.filter( Tract.code.in_(tract_codes) & Tract.code.like('%\_L')).all() check_lesion_tract_overlaps(tracts) # loop through tracts connecting hemispheres current_app.logger.info( 'Checking lesion overlap with tracts connecting hemispheres.') tracts = Tract.query.filter( Tract.code.in_(tract_codes) & ~Tract.code.like('%\_R') & ~Tract.code.like('%\_L')).all() # ~ negates the like check_lesion_tract_overlaps(tracts) # sort tracts by overlap score (highest to lowest) intersecting_tracts = sorted(intersecting_tracts, key=lambda tract: tract["overlapScore"])[::-1] return make_response(jsonify(intersecting_tracts)), 200
def download_tract(tract_code): current_app.logger.info(f'Downloading tract info with code {tract_code}') cache = JobCache(current_app.cache, current_app.cache_lock) query_string_decoded = request.query_string.decode('utf-8') cache_key = cu.construct_cache_key(query_string_decoded) # validate request query and tract_code request_query = jquery_unparam(query_string_decoded) if not check_request_query(request_query): current_app.logger.info( f'Could not properly parse param string {query_string_decoded} in /generate_mean_maps, returning 400...' ) return 'Could not parse query param string.', 400 # validate tract code tract = dbu.get_tract(tract_code) if not tract: current_app.logger.info( f'Tract with code {tract_code} does not exist, returning 404...') return 'The requested tract ' + tract_code + ' does not exist', 404 # check tract prob map and mean maps are in cache # if not in there, re-calculate them # else get file paths from the cache mean_maps_status = cache.add_job_locked(cache_key, 'mean_maps') if mean_maps_status in ['PROCEED', 'FAILED', None]: # job ready to go or cache could not be accessed current_app.logger.info( f'mean_maps job status is {mean_maps_status}. Generating mean_maps for query {json.dumps(request_query, indent=4)}' ) subject_ids_dataset_paths = dbu.subject_id_dataset_file_path( request_query) if len(subject_ids_dataset_paths) > 0: if mean_maps_status: cache.job_in_progress(cache_key, 'mean_maps') data_dir = current_app.config['DATA_FILE_PATH'] mean_FA = du.subject_averaged_FA(subject_ids_dataset_paths, data_dir) mean_MD = du.subject_averaged_MD(subject_ids_dataset_paths, data_dir) if mean_maps_status: cache.job_complete(cache_key, 'mean_maps', { 'FA': mean_FA, 'MD': mean_MD }) else: # no subjects returned in query current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return 'No subjects returned in query', 404 elif mean_maps_status in ['STAGED', 'IN_PROGRESS']: current_app.logger.info( f'mean_maps job in progress waiting for job to finish...') # poll cache until COMPLETE # set status to failed if waiting 20 secs timeout = 20 cache.poll_cache(cache_key, 'mean_maps', timeout, 0.2) if cache.job_status(cache_key, 'mean_maps') == 'COMPLETE': current_app.logger.info('mean_maps job complete') # get FA and MD maps from cache mean_maps = cache.job_result(cache_key, 'mean_maps') FA_file_path = mean_maps.get('FA') MD_file_path = mean_maps.get('MD') else: current_app.logger.warn( f'mean_maps job failed to complete in {timeout} secs, setting job status to FAILED and returning...' ) cache.job_failed(cache_key, 'mean_maps') return 'mean_maps job FAILED', 500 elif mean_maps_status == 'COMPLETE': current_app.logger.info('mean_maps job complete') # get FA and MD maps from cache mean_maps = cache.job_result(cache_key, 'mean_maps') FA_file_path = mean_maps.get('FA') MD_file_path = mean_maps.get('MD') # check if tract probability map has been cached or needs to be recreated tract_status = cache.add_job_locked(cache_key, tract_code) if tract_status in ['PROCEED', 'FAILED', None]: # new job created or could not access cache current_app.logger.info( f'{tract_code} job status is {tract_status}, generating new probability map...' ) file_path_data = dbu.density_map_file_path_data(request_query) if len(file_path_data) > 0: if tract_status: current_app.logger.info( f'Adding {tract_code} job for query {json.dumps(request_query, indent=4)}' ) cache.job_in_progress(cache_key, tract_code) else: current_app.logger.info( f'Calculating probability map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) data_dir = current_app.config[ 'DATA_FILE_PATH'] # file path to data folder tract_file_path = du.generate_average_density_map( data_dir, file_path_data, tract, 'MNI') if tract_status: cache.job_complete(cache_key, tract_code, tract_file_path) current_app.logger.info( f'{tract_code} job complete for query {json.dumps(request_query, indent=4)}' ) else: current_app.logger.info( f'Completed probabilty map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) else: current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return "No subjects returned for the current query", 404 elif tract_status in ['STAGED', 'IN_PROGRESS']: # another worker is running the job current_app.logger.info( f'{tract_code} job in progress, waiting to complete...') # poll cache waiting for complete status (max wait 10 secs before quitting) timeout = 10 cache.poll_cache(cache_key, tract_code, timeout, 0.2) # set status to FAILED if not COMPLETE after 10 secs if cache.job_status(cache_key, tract_code) == 'COMPLETE': tract_file_path = cache.job_result(cache_key, tract_code) else: current_app.logger.warn( f'{tract_code} job did not complete in {timeout} secs, setting job status to FAILED.' ) cache.job_failed(cache_key, tract_code) return f'Job {tract_code} timed out for query {json.dumps(request_query, indent=4)}.', 500 elif tract_status == 'COMPLETE': # job has already been completed current_app.logger.info(f'{tract_code} job complete.') # job has already been run, get file_path from cache tract_file_path = cache.job_result(cache_key, tract_code) # get subjects and demographic tract metrics from db subjects = dbu.subjects_to_download(request_query) subject_tract_metrics = dbu.subject_tract_metrics(request_query, tract.code) if len(subject_tract_metrics) > 0: averaged_metrics = np.mean(subject_tract_metrics, axis=0) results = {} results['volume'] = averaged_metrics[0] results['meanFA'] = averaged_metrics[1] results['meanMD'] = averaged_metrics[2] results['stdFA'] = averaged_metrics[3] results['stdMD'] = averaged_metrics[4] results['name'] = tract.name results['description'] = tract.description else: current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return 'No subjects returned for the current query', 404 data_dict = { 'query': request_query, 'subjects': subjects, 'demographic_data': subject_tract_metrics.tolist() } data_json = bytes(json.dumps(data_dict), 'utf-8') # use a temporary file to create zip file in memory with tempfile.SpooledTemporaryFile() as tp: with zipfile.ZipFile(tp, 'w') as output_zip: # write files to the zip archive output_zip.writestr('data.json', data_json) output_zip.write(tract_file_path, arcname=f'{tract_code}.nii.gz') output_zip.write(MD_file_path, arcname='MD.nii.gz') output_zip.write(FA_file_path, arcname='FA.nii.gz') tp.seek(0) # reset cursor to beginning of file zipped_bytes = tp.read() # get the bytes return send_file( io.BytesIO(zipped_bytes), # create a BytesIO object as_attachment=True, attachment_filename=f'mgtrk_{tract_code}.zip', conditional=True, add_etags=True)
def get_dynamic_tract_info(tract_code, threshold): current_app.logger.info( f'Getting dynamic tract info for tract {tract_code} and threshold {threshold}.' ) cache = JobCache(current_app.cache, current_app.cache_lock) query_string_decoded = request.query_string.decode('utf-8') cache_key = cu.construct_cache_key(query_string_decoded) # jquery_unparam query string # check request query is valid request_query = jquery_unparam(query_string_decoded) if not check_request_query(request_query): current_app.logger.info( f'Could not properly parse param string {query_string_decoded} in /generate_mean_maps, returning 400...' ) return 'Could not parse query param string.', 400 # validate tract code tract = dbu.get_tract(tract_code) if not tract: current_app.logger.info( f'Tract with code {tract_code} does not exist, returning 404...') return 'The requested tract ' + tract_code + ' does not exist', 404 # validate threshold try: threshold = int( threshold ) * 0.01 # scale threshold to 0 - 1 since density map is stored in this range except ValueError: current_app.logger.info( 'Invalid threshold value applied, returning 404...') return f'Invalid threshold value {threshold} sent to server.', 404 # check mean_maps job status mean_maps_status = cache.add_job_locked(cache_key, 'mean_maps') if mean_maps_status in ['PROCEED', 'FAILED', None]: # job ready to go or cache could not be accessed current_app.logger.info( f'mean_maps job status is {mean_maps_status}. Generating mean_maps for query {json.dumps(request_query, indent=4)}' ) subject_ids_dataset_paths = dbu.subject_id_dataset_file_path( request_query) if len(subject_ids_dataset_paths) > 0: if mean_maps_status: cache.job_in_progress(cache_key, 'mean_maps') data_dir = current_app.config['DATA_FILE_PATH'] mean_FA = du.subject_averaged_FA(subject_ids_dataset_paths, data_dir) mean_MD = du.subject_averaged_MD(subject_ids_dataset_paths, data_dir) if mean_maps_status: cache.job_complete(cache_key, 'mean_maps', { 'FA': mean_FA, 'MD': mean_MD }) else: # no subjects returned in query current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return 'No subjects returned in query', 404 elif mean_maps_status in ['STAGED', 'IN_PROGRESS']: current_app.logger.info( f'mean_maps job in progress waiting for job to finish...') # poll cache until COMPLETE # set status to failed if waiting 20 secs timeout = 20 cache.poll_cache(cache_key, 'mean_maps', timeout, 0.2) if cache.job_status(cache_key, 'mean_maps') == 'COMPLETE': current_app.logger.info('mean_maps job complete') # get FA and MD maps from cache mean_maps = cache.job_result(cache_key, 'mean_maps') FA_file_path = mean_maps.get('FA') MD_file_path = mean_maps.get('MD') else: current_app.logger.warn( f'mean_maps job failed to complete in {timeout} secs, setting job status to FAILED and returning...' ) cache.job_failed(cache_key, 'mean_maps') return 'mean_maps job FAILED', 500 elif mean_maps_status == 'COMPLETE': current_app.logger.info('mean_maps job complete') # get FA and MD maps from cache mean_maps = cache.job_result(cache_key, 'mean_maps') FA_file_path = mean_maps.get('FA') MD_file_path = mean_maps.get('MD') # check if tract probability map has been cached or needs to be recreated tract_status = cache.add_job_locked(cache_key, tract_code) if tract_status in ['PROCEED', 'FAILED', None]: # new job created or could not access cache current_app.logger.info( f'{tract_code} job status is {tract_status}, generating new probability map...' ) file_path_data = dbu.density_map_file_path_data(request_query) if len(file_path_data) > 0: if tract_status: current_app.logger.info( f'Adding {tract_code} job for query {json.dumps(request_query, indent=4)}' ) cache.job_in_progress(cache_key, tract_code) else: current_app.logger.info( f'Calculating probability map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) data_dir = current_app.config[ 'DATA_FILE_PATH'] # file path to data folder tract_file_path = du.generate_average_density_map( data_dir, file_path_data, tract, 'MNI') if tract_status: cache.job_complete(cache_key, tract_code, tract_file_path) current_app.logger.info( f'{tract_code} job complete for query {json.dumps(request_query, indent=4)}' ) else: current_app.logger.info( f'Completed probabilty map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) else: current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return "No subjects returned for the current query", 404 elif tract_status in ['STAGED', 'IN_PROGRESS']: # another worker is running the job current_app.logger.info( f'{tract_code} job in progress, waiting to complete...') # poll cache waiting for complete status (max wait 10 secs before quitting) timeout = 10 cache.poll_cache(cache_key, tract_code, timeout, 0.2) # set status to FAILED if not COMPLETE after 10 secs if cache.job_status(cache_key, tract_code) == 'COMPLETE': tract_file_path = cache.job_result(cache_key, tract_code) else: current_app.logger.warn( f'{tract_code} job did not complete in {timeout} secs, setting job status to FAILED.' ) cache.job_failed(cache_key, tract_code) return f'Job {tract_code} timed out for query {json.dumps(request_query, indent=4)}.', 500 elif tract_status == 'COMPLETE': # job has already been completed current_app.logger.info(f'{tract_code} job complete.') # job has already been run, get file_path from cache tract_file_path = cache.job_result(cache_key, tract_code) # calculate results and return FA_map_data = du.get_nifti_data(FA_file_path) MD_map_data = du.get_nifti_data(MD_file_path) tract_data = du.get_nifti_data(tract_file_path) mean_FA, std_FA = du.averaged_tract_mean_std(FA_map_data, tract_data, threshold) mean_MD, std_MD = du.averaged_tract_mean_std(MD_map_data, tract_data, threshold) vol = du.averaged_tract_volume(tract_data, threshold) results = {} results['tractCode'] = tract_code results['tractName'] = tract.name results['volume'] = vol results['meanFA'] = mean_FA results['stdFA'] = std_FA results['meanMD'] = mean_MD results['stdMD'] = std_MD return jsonify(results)
def get_tract(tract_code): cache = JobCache(current_app.cache, current_app.cache_lock) # construct cache key query_string_decoded = request.query_string.decode('utf-8') cache_key = cu.construct_cache_key(query_string_decoded) # jquery_unparam query string # check request query is valid request_query = jquery_unparam(query_string_decoded) request_query.pop( 'file_type', None ) # remove query param required for correct parsing of nii.gz client side if not check_request_query(request_query): current_app.logger.warn( f'Could not parse param string {query_string_decoded}, returning 400...' ) return 'Could not parse query param string.', 400 current_app.logger.info( f'Getting tract {tract_code} for query {json.dumps(request_query, indent=4)}' ) # validate tract code tract = dbu.get_tract(tract_code) if not tract: current_app.logger.warn( f'Nonexistent tract code {tract_code}, returning 400...') return f'The requested tract {tract_code} does not exist', 400 status = cache.add_job_locked(cache_key, tract_code) if status in ['PROCEED', None]: # new job created or could not access cache current_app.logger.info(f'Job status is {status}') file_path_data = dbu.density_map_file_path_data(request_query) if len(file_path_data) > 0: if status: current_app.logger.info( f'Adding {tract_code} job for query {json.dumps(request_query, indent=4)}' ) cache.job_in_progress(cache_key, tract_code) else: current_app.logger.info( f'Calculating probability map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) data_dir = current_app.config[ 'DATA_FILE_PATH'] # file path to data folder file_path = du.generate_average_density_map( data_dir, file_path_data, tract, 'MNI') if status: cache.job_complete(cache_key, tract_code, file_path) current_app.logger.info( f'{tract_code} job complete for query {json.dumps(request_query, indent=4)}' ) else: current_app.logger.info( f'Completed probabilty map for tract {tract_code} and query {json.dumps(request_query, indent=4)}' ) file_path = file_path_relative_to_root_path(file_path) return send_file(file_path, as_attachment=True, attachment_filename=tract_code + '.nii.gz', conditional=True, add_etags=True) else: current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) return "No subjects returned for the current query", 404 elif status in ['STAGED', 'IN_PROGRESS']: # another worker is running the job current_app.logger.info( f'{tract_code} job in progress, waiting to complete...') # poll cache waiting for complete status (max wait 10 secs before quitting) timeout = 10 cache.poll_cache(cache_key, tract_code, timeout, 0.2) # set status to FAILED if not COMPLETE after 10 secs if cache.job_status(cache_key, tract_code) == 'COMPLETE': file_path = cache.job_result(cache_key, tract_code) file_path = file_path_relative_to_root_path(file_path) return send_file(file_path, as_attachment=True, attachment_filename=tract_code + '.nii.gz', conditional=True, add_etags=True) else: current_app.logger.warn( f'{tract_code} job did not complete in {timeout} secs, setting job status to FAILED.' ) cache.job_failed(cache_key, tract_code) return f'Job {tract_code} timed out for query {json.dumps(request_query, indent=4)}.', 500 elif status == 'COMPLETE': # job has already been completed current_app.logger.info(f'{tract_code} job complete.') # job has already been run, get file_path from cache file_path = cache.job_result(cache_key, tract_code) file_path = file_path_relative_to_root_path(file_path) return send_file(file_path, as_attachment=True, attachment_filename=tract_code + '.nii.gz', conditional=True, add_etags=True) elif status == 'FAILED': # job was attempted but failed return f'Job {tract_code} failed for query {json.dumps(request_query, indent=4)}.', 500 else: return f'Unrecognised status {status} for job {tract_code} with query {json.dumps(request_query, indent=4)}.', 500
def generate_mean_maps(): # instantiate JobCache cache = JobCache(current_app.cache, current_app.cache_lock) # construct cache key query_string_decoded = request.query_string.decode('utf-8') cache_key = cu.construct_cache_key(query_string_decoded) # jquery_unparam query string # check request query is valid request_query = jquery_unparam(query_string_decoded) if not check_request_query(request_query): current_app.logger.info( f'Could not parse param string {json.dumps(request_query, indent=4)}' ) return 'Could not parse query param string.', 400 current_app.logger.info('Attempting to add job mean_maps') status = cache.add_job_locked(cache_key, 'mean_maps') if status is None: # could not access cache so no point doing the work if we can't cache it current_app.logger.info(f'Could not access cache, returning...') return 'Could not access cache', 204 elif status in ['PROCEED', 'FAILED']: current_app.logger.info(f'Job status is {status}') subject_ids_dataset_paths = dbu.subject_id_dataset_file_path( request_query) if len(subject_ids_dataset_paths) > 0: current_app.logger.info( f'Adding mean_maps job for query {json.dumps(request_query, indent=4)}' ) cache.job_in_progress(cache_key, 'mean_maps') data_dir = current_app.config['DATA_FILE_PATH'] mean_FA = du.subject_averaged_FA(subject_ids_dataset_paths, data_dir) mean_MD = du.subject_averaged_MD(subject_ids_dataset_paths, data_dir) cache.job_complete(cache_key, 'mean_maps', { 'FA': mean_FA, 'MD': mean_MD }) current_app.logger.info( f'mean_maps job complete for query {json.dumps(request_query, indent=4)}' ) return 'Mean maps created', 204 else: # no subjects returned in query current_app.logger.info( f'No subjects returned for query {json.dumps(request_query, indent=4)}' ) cache.remove_job(cache_key, 'mean_maps') return 'No subjects returned in query', 204 elif status in ['STAGED', 'IN_PROGRESS', 'COMPLETE']: current_app.logger.info( 'mean_maps job in_progress or complete, returning...') return 'Mean maps job in progress or complete', 204 else: return f'Unrecognised status {status} for job mean_maps with query {json.dumps(request_query, indent=4)}.', 500