def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
import pandas as pd with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data[trajectories_data['worm_index_joined'] == 2] #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps / 3)) if coords_smooth_window <= 3: coords_smooth_window = None good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file) for iw, worm_index in enumerate(good_traj_index): print(iw, len(good_traj_index)) worm = WormFromTable(skeletons_file, worm_index, worm_index_type=worm_index_type) if is_WT2: worm.correct_schafer_worm() wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour, worm.dorsal_contour, skel_smooth_window=5, coords_smooth_window=coords_smooth_window, gap_to_interp=5) # save_file = os.path.join(save_dir, save_prefix.format(worm_index)) # np.savez_compressed(save_file, # skeleton=wormN.skeleton, # ventral_contour=wormN.ventral_contour,
is_single_worm = False use_manual_join = False use_skel_filter = True fps = 25 good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, param.feat_filt_param) worm_index = good_traj_index[0] worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) split_traj_frames = 300*fps splitted_worms = [x for x in worm.splitWormTraj(split_traj_frames) if x.n_valid_skel > 100] wStats = WormStats() dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} # worm_openworm = copy.copy(worm) # worm_openworm.changeAxis() # assert worm_openworm.skeleton.shape[1] == 2 # worm_features = mv.WormFeatures(worm_openworm) #
def smooth_skeletons_table(skeletons_file, features_file, is_WT2=False, skel_smooth_window=5, coords_smooth_window_s=0.25, gap_to_interp_s=0.25): #%% #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps * coords_smooth_window_s)) gap_to_interp = int(np.round(fps * gap_to_interp_s)) if coords_smooth_window <= 3: #do not interpolate coords_smooth_window = None trajectories_data = _r_fill_trajectories_data(skeletons_file) #%% trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(skeletons_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) #%% #initialize arrays food_cnt = read_food_contour(skeletons_file) with tables.File(skeletons_file, 'r') as fid: n_segments = fid.get_node('/skeleton').shape[1] with tables.File(features_file, 'w') as fid_features: if food_cnt is not None: fid_features.create_array('/', 'food_cnt_coord', obj=food_cnt.astype(np.float32)) worm_coords_array = {} w_node = fid_features.create_group('/', 'coordinates') for array_name in [ 'skeletons', 'dorsal_contours', 'ventral_contours', 'widths' ]: if array_name != 'widths': a_shape = (0, n_segments, 2) else: a_shape = (0, n_segments) worm_coords_array[array_name] = fid_features.create_earray( w_node, array_name, shape=a_shape, atom=tables.Float32Atom(shape=()), filters=TABLE_FILTERS) tot_skeletons = 0 for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): if worm_data['was_skeletonized'].sum() < 2: continue worm = WormFromTable(skeletons_file, worm_index, worm_index_type='worm_index_joined') if is_WT2: worm.correct_schafer_worm() if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2: warnings.warn('Not enough data to smooth. Empty file?') wormN = worm else: wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour, worm.dorsal_contour, skel_smooth_window=skel_smooth_window, coords_smooth_window=coords_smooth_window, gap_to_interp=gap_to_interp) dat_index = pd.Series(False, index=worm_data['timestamp_raw'].values) try: dat_index[worm.timestamp] = True except ValueError: import pdb pdb.set_trace() #%% skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons tot_skeletons = skeleton_id[-1] + 1 row_ind = worm_data.index[dat_index.values] trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id #%% #add data worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton')) worm_coords_array['dorsal_contours'].append( getattr(wormN, 'dorsal_contour')) worm_coords_array['ventral_contours'].append( getattr(wormN, 'ventral_contour')) worm_coords_array['widths'].append(getattr(wormN, 'widths')) #display progress _display_progress(ind_n + 1) #save trajectories data newT = fid_features.create_table( '/', 'trajectories_data', obj=trajectories_data.to_records(index=False), filters=TABLE_FILTERS) copy_unit_conversions(newT, skeletons_file) newT._v_attrs['is_WT2'] = is_WT2 newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file) #save blob features interpolating in dropped frames and stage movement (WT2) blob_features = _r_fill_blob_features(skeletons_file, trajectories_data, is_WT2) if blob_features is not None: fid_features.create_table( '/', 'blob_features', obj=blob_features.to_records(index=False), filters=TABLE_FILTERS)