def calculate_feat_stats_cnt(fname): print(os.path.basename(fname)) def _read_points(fid, field): dat = fid.get_node(field)[:] vec = [x[0] for x in dat] return vec field_names = { 'skeletons':'/all_skeletons', 'cnt_dorsal':'/all_non_vulva_contours', 'cnt_ventral':'/all_vulva_contours' } with tables.File(fname, 'r') as fid: data = {key:_read_points(fid, field) for key, field in field_names.items()} bw = mv.BasicWorm.from_contour_factory(data['cnt_ventral'], data['cnt_dorsal']) nw = mv.NormalizedWorm.from_BasicWorm_factory(bw) #it seems that the skeletons are in micrometers bw.video_info.fps = 10 #the simulated data is corrected in time wf = mv.WormFeatures(nw) wStats = WormStats() worm_stat = {} for stat, func in FUNC_FOR_DIV.items(): worm_stat[stat] = wStats.getWormStats(wf, func) return concat_dataframe(fname, worm_stat)
def getOpenWormData(worm, wStats=[]): if not isinstance(wStats, WormStats): wStats = WormStats() #get the worm features at its stats worm_features, worm_stats = getFeatStats(worm, wStats) # convert the timeseries features into a recarray tot_frames = worm.timestamp.size timeseries_data = np.full(tot_frames, np.nan, wStats.feat_timeseries_dtype) timeseries_data['timestamp'] = worm.timestamp timeseries_data['worm_index'] = worm.worm_index timeseries_data['skeleton_id'] = worm.skeleton_id timeseries_data['motion_modes'] = worm_features._features[ 'locomotion.motion_mode'].value for feat in wStats.feat_timeseries: feat_obj = wStats.features_info.loc[feat, 'feat_name_obj'] if feat_obj in worm_features._features: timeseries_data[feat] = worm_features._features[feat_obj].value # convert the events features into a dictionary events_data = {} for feat in wStats.feat_events: feat_obj = wStats.features_info.loc[feat, 'feat_name_obj'] if feat_obj in worm_features._features: events_data[feat] = worm_features._features[feat_obj].value return timeseries_data, events_data, worm_stats
def convertUnits(df, microns_per_pixel): def _removeExtra(x): for bad_str in [ '_pos', '_neg', '_abs', '_paused', '_foward', '_backward' ]: x = x.replace(bad_str, '') return x dict_units = WormStats().features_info['units'].to_dict() def _getFactor(x): try: units = dict_units[_removeExtra(x)] except KeyError: units = '1' if units in ['microns', 'microns/seconds']: return microns_per_pixel elif units == 'microns^2': return microns_per_pixel**2 elif units == 'radians/microns': return 1 / microns_per_pixel else: return 1 for feat in df: df[feat] *= _getFactor(feat) return df
def ow_plate_summary(fname): all_feats = read_feat_events(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] for cc in features_timeseries: all_feats[cc] = features_timeseries[cc].values wStats = WormStats() exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] exp_feats = exp_feats.loc[:, valid_order] return exp_feats
def process_ow_file(fname): with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] all_feats = read_feat_events(fname) for cc in features_timeseries: all_feats[cc] = features_timeseries[cc].values wStats = WormStats() exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats).T[0] valid_c = [x for x in exp_feats.index if x not in wStats.extra_fields] exp_feats = exp_feats[valid_c] return exp_feats
def ow_plate_summary_augmented(fname, **fold_args): #NOTE: I will only augment the timeseries features. #It is not trivial to include the event features sampling over time. fps = read_fps(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] fold_index = augment_data(features_timeseries, fps=fps, **fold_args) valid_order = None wStats = WormStats() all_summary = [] for i_fold, ind_fold in enumerate(fold_index): timeseries_data_r = features_timeseries[ind_fold].reset_index(drop=True) all_feats = {} for cc in timeseries_data_r: all_feats[cc] = timeseries_data_r[cc].values exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) if valid_order is None: #only calculate this the first time... valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] exp_feats = exp_feats.loc[:, valid_order] exp_feats.insert(0, 'i_fold', i_fold) all_summary.append(exp_feats) all_summary = pd.concat(all_summary, ignore_index=True) return all_summary
def ow_trajectories_summary(fname): fps = read_fps(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] all_summary = [] valid_order = None wStats = WormStats() for w_ind, w_ts_data in features_timeseries.groupby('worm_index'): ll = ['worm_{}'.format(int(w_ind))] all_feats = read_feat_events(fname, ll) for cc in w_ts_data: all_feats[cc] = w_ts_data[cc].values exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) if valid_order is None: #only calculate this the first time... valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] #remove uncalculated indexes from wStats exp_feats = exp_feats.loc[:, valid_order] assert not 'worm_index' in exp_feats exp_feats = add_trajectory_info(exp_feats, w_ind, w_ts_data, fps) all_summary.append(exp_feats) all_summary = pd.concat(all_summary, ignore_index=True) return all_summary
def getFeatStats(worm, wStats): if not isinstance(wStats, WormStats): wStats = WormStats() worm_openworm = worm.to_open_worm() assert worm_openworm.skeleton.shape[1] == 2 worm_features = mv.WormFeatures(worm_openworm) def _get_worm_stat(func): # calculate the mean value of each feature worm_stat = wStats.getWormStats(worm_features, func) for field in wStats.extra_fields: worm_stat[field] = getattr(worm, field) return worm_stat worm_stats = {stat: _get_worm_stat(FUNC_FOR_DIV[stat]) for stat in FUNC_FOR_DIV} return worm_features, worm_stats
def _getUnits(features_file, READ_FEATURES=False): fps_out, microns_per_pixel_out, _ = read_unit_conversions(features_file) xy_units = microns_per_pixel_out[1] time_units = fps_out[2] units = OrderedDict() units["size"] = "mm" #size of the plate units['t'] = time_units #frames or seconds for field in ['x', 'y', 'px', 'py']: units[field] = xy_units #(pixels or micrometers) if READ_FEATURES: #TODO how to change microns to pixels when required ws = WormStats() for field, unit in ws.features_info['units'].iteritems(): units['@OMG ' + field] = unit return units
def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
param.feat_filt_param) worm_index = good_traj_index[0] worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) split_traj_frames = 300*fps splitted_worms = [x for x in worm.splitWormTraj(split_traj_frames) if x.n_valid_skel > 100] wStats = WormStats() dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} # worm_openworm = copy.copy(worm) # worm_openworm.changeAxis() # assert worm_openworm.skeleton.shape[1] == 2 # worm_features = mv.WormFeatures(worm_openworm) # # wStats = WormStatsClass() # worm_stats = wStats.getWormStats(worm_features, np.mean) #%% # getWormFeaturesFilt( # skeletons_file,
def __init__(self): self.ws = WormStats()