def getFilteredSkels(skeletons_file, min_num_skel=100, bad_seg_thresh=0.8, min_displacement=5, critical_alpha=0.01, max_width_ratio=2.25, max_area_ratio=6): min_num_skel = min_num_skel_defaults(skeletons_file, min_num_skel=min_num_skel) # check if the skeletonization finished succesfully with tables.File(skeletons_file, "r") as ske_file_id: skeleton_table = ske_file_id.get_node('/skeleton') #eliminate skeletons that do not match a decent head, tail and body ratio. Likely to be coils. Taken from Segworm. filterPossibleCoils(skeletons_file, max_width_ratio=max_width_ratio, max_area_ratio=max_area_ratio) with pd.HDFStore(skeletons_file, 'r') as table_fid: trajectories_data = table_fid['/trajectories_data'] # get valid rows using the trajectory displacement and the # skeletonization success. These indexes will be used to calculate statistics of what represent a valid skeleton. good_traj_index, good_skel_row = getValidIndexes( trajectories_data, min_num_skel=min_num_skel, bad_seg_thresh=bad_seg_thresh, min_displacement=min_displacement) #filter skeletons depending the population morphology (area, width and length) filterByPopulationMorphology(skeletons_file, good_skel_row, critical_alpha=critical_alpha)
def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
def getIntensityProfile(masked_image_file, skeletons_file, intensities_file, width_resampling=15, length_resampling=131, min_num_skel=100, smooth_win=11, pol_degree=3, width_percentage=0.5, save_maps=False): min_num_skel = min_num_skel_defaults(skeletons_file, min_num_skel=min_num_skel) assert smooth_win > pol_degree assert min_num_skel > 0 assert 0 < width_percentage < 1 # we want to use symetrical distance centered in the skeleton if length_resampling % 2 == 0: length_resampling += 1 if width_resampling % 2 == 0: width_resampling += 1 # get the limits to be averaged from the intensity map if save_maps: width_win_ind = getWidthWinLimits(width_resampling, width_percentage) else: width_win_ind = (0, width_resampling) # filters for the tables structures table_filters = tables.Filters(complevel=5, complib='zlib', shuffle=True, fletcher32=True) # Get a reduced version of the trajectories_data table with only the valid skeletons. # The rows of this new table are going to be saved into skeletons_file trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel) # let's save this new table into the intensities file with tables.File(intensities_file, 'w') as fid: fid.create_table('/', 'trajectories_data_valid', obj=trajectories_data_valid.to_records(index=False), filters=table_filters) tot_rows = len(trajectories_data_valid) if tot_rows == 0: with tables.File(intensities_file, "r+") as int_file_id: # nothing to do here let's save empty data and go out worm_int_avg_tab = int_file_id.create_array( "/", "straighten_worm_intensity_median", obj=np.zeros(0)) worm_int_avg_tab._v_attrs['has_finished'] = 1 return with tables.File(masked_image_file, 'r') as mask_fid, \ tables.File(skeletons_file, 'r') as ske_file_id, \ tables.File(intensities_file, "r+") as int_file_id: # pointer to the compressed videos mask_dataset = mask_fid.get_node("/mask") # pointer to skeletons skel_tab = ske_file_id.get_node('/skeleton') skel_width_tab = ske_file_id.get_node('/width_midbody') filters = tables.Filters(complevel=5, complib='zlib', shuffle=True) # we are using Float16 to save space, I am assuing the intensities are # between uint8 worm_int_avg_tab = int_file_id.create_carray( "/", "straighten_worm_intensity_median", tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling), chunkshape=(1, length_resampling), filters=table_filters) worm_int_avg_tab._v_attrs['has_finished'] = 0 worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind if save_maps: worm_int_tab = int_file_id.create_carray( "/", "straighten_worm_intensity", tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling, width_resampling), chunkshape=(1, length_resampling, width_resampling), filters=table_filters) grouped_frames = trajectories_data_valid.groupby('frame_number') # variables used to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] progressTime = TimeCounter('Obtaining intensity maps.', len(grouped_frames)) for frame, frame_data in grouped_frames: img = mask_dataset[frame, :, :] for ii, row_data in frame_data.iterrows(): skeleton_id = int(row_data['skeleton_id']) worm_index = int(row_data['worm_index_joined']) int_map_id = int(row_data['int_map_id']) # read ROI and skeleton, and put them in the same coordinates # map worm_img, roi_corner = getWormROI(img, row_data['coord_x'], row_data['coord_y'], row_data['roi_size']) skeleton = skel_tab[skeleton_id, :, :] - roi_corner half_width = skel_width_tab[skeleton_id] / 2 assert not np.isnan(skeleton[0, 0]) skel_smooth = smoothSkeletons( skeleton, length_resampling=length_resampling, smooth_win=smooth_win, pol_degree=pol_degree) straighten_worm, grid_x, grid_y = getStraightenWormInt( worm_img, skel_smooth, half_width=half_width, width_resampling=width_resampling) # if you use the mean it is better to do not use float16 int_avg = np.median( straighten_worm[width_win_ind[0]:width_win_ind[1], :], axis=0) worm_int_avg_tab[int_map_id] = int_avg # only save the full map if it is specified by the user if save_maps: worm_int_tab[int_map_id] = straighten_worm.T if frame % 500 == 0: progress_str = progressTime.get_str(frame) print_flush(base_name + ' ' + progress_str) worm_int_avg_tab._v_attrs['has_finished'] = 1