def read_file_data(mask_file, feat_file, min_pulse_size_s=3, _is_debug=False): fps = read_fps(mask_file) min_pulse_size = fps*min_pulse_size_s light_on = read_light_data(mask_file) if np.nansum(light_on) < min_pulse_size: return turn_on, turn_off = get_pulses_indexes(light_on, min_pulse_size) region_lab = define_regions(light_on.size, turn_on, turn_off) region_size = np.bincount(region_lab)[1:]/fps if _is_debug: plt.figure() #plt.plot(region_lab) plt.plot(light_on) plt.plot(turn_on, light_on[turn_on], 'o') plt.plot(turn_off, light_on[turn_off], 'x') plt.title(os.path.basename(mask_file)) #read features with pd.HDFStore(feat_file, 'r') as fid: timeseries_data = fid['/timeseries_data'] blob_features = fid['/blob_features'] timeseries_data['timestamp'] = timeseries_data['timestamp'].astype(np.int) #label if frame with the corresponding region timeseries_data['region_lab'] = region_lab[timeseries_data['timestamp']] return timeseries_data, blob_features, fps, region_size
def find_rev_frac(results_dir, base_name, expected_pulse_size_s, check_window_s): feat_file, mask_file = get_names(results_dir, base_name) fps = read_fps(feat_file) expected_pulse_size = fps*expected_pulse_size_s check_window = fps*check_window_s light_on = read_light_data(mask_file) with pd.HDFStore(feat_file, 'r') as fid: feat_timeseries = fid['/features_timeseries'] feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int) #%% #find the indexes where the pulses start and end switches = np.diff(light_on.astype(np.int)) turn_on, = np.where(switches==1) turn_off, = np.where(switches==-1) assert turn_on.size == turn_off.size #%% #find the reversal fraction for each pulse rev_fracs = [] for ini, fin in zip(turn_on, turn_off): if fin-ini < expected_pulse_size/2: #the pulse is too short, let's ignore it continue before = find_pulse_rev_frac(feat_timeseries, ini-expected_pulse_size, check_window) centre = find_pulse_rev_frac(feat_timeseries, ini, check_window) after = find_pulse_rev_frac(feat_timeseries, fin+expected_pulse_size, check_window) rev_fracs.append((before, centre, after)) return rev_fracs
def read_input(results_dir, base_name, base_window_s, expected_pulse_size_s): feat_file, mask_file = get_names(results_dir, base_name) fps = read_fps(feat_file) expected_pulse_size = fps*expected_pulse_size_s base_window = fps*base_window_s light_on = read_light_data(mask_file) feat_ranges = get_regions(light_on, expected_pulse_size, base_window) #read features with pd.HDFStore(feat_file, 'r') as fid: feat_timeseries = fid['/features_timeseries'] feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int) for feat in signed_ventral_feats: feat_timeseries[feat] = feat_timeseries[feat].abs() return feat_ranges, feat_timeseries, fps
def __init__(self, file_name, worm_index, use_skel_filter=True, worm_index_type='worm_index_joined', smooth_window=-1, POL_DEGREE_DFLT=3): # Populates an empty normalized worm. #if it does not exists return 1 as a default, like that we can still calculate the features in pixels and frames, instead of micrometers and seconds. self.microns_per_pixel = read_microns_per_pixel(file_name, dflt=1) self.fps = read_fps(file_name, dflt=1) # savitzky-golay filter polynomial order default self.POL_DEGREE_DFLT = POL_DEGREE_DFLT # save the input parameters self.file_name = file_name self.worm_index = worm_index self.use_skel_filter = use_skel_filter self.worm_index_type = worm_index_type # set to less than POL_DEGREE_DFLT to eliminate smoothing self.smooth_window = smooth_window # smooth window must be an odd number larger than the polynomial degree # (savitzky-golay filter requirement) if self.smooth_window >= self.POL_DEGREE_DFLT and self.smooth_window % 2 == 0: self.smooth_window += 1 self.ventral_side = 'unknown' self._h_read_data() # smooth data if required if self.smooth_window > self.POL_DEGREE_DFLT: # print('Smoothing...') self.skeleton = _h_smooth_curve_all(self.skeleton, window=self.smooth_window) self.widths = _h_smooth_curve_all(self.widths, window=self.smooth_window) # assert the dimenssions of the read data are correct self._h_assert_data_dim()
def tierpsy_plate_summary_augmented(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3, **fold_args): fps = read_fps(fname) data_in = read_data(fname, time_windows, time_units, fps, is_manual_index) if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in # initialize list of summaries for all time windows all_summaries_list = [] # loop over time windows for iwin,window in enumerate(time_windows): if timeseries_data[iwin].empty: all_summary = pd.DataFrame([]) else: fold_index = augment_data(timeseries_data[iwin], fps=fps, **fold_args) # initialize list of augmented plate summaries for given time window all_summary = [] # loop over folds for i_fold, ind_fold in enumerate(fold_index): timeseries_data_r = timeseries_data[iwin][ind_fold].reset_index(drop=True) blob_features_r = blob_features[iwin][ind_fold].reset_index(drop=True) plate_feats = get_summary_stats(timeseries_data_r, fps, blob_features_r, delta_time) plate_feats = pd.DataFrame(plate_feats).T plate_feats.insert(0, 'i_fold', i_fold) all_summary.append(plate_feats) # concatenate all folds in given time window into one dataframe all_summary = pd.concat(all_summary, ignore_index=True, sort=False) # add dataframe to the list of summaries for all time windows all_summaries_list.append(all_summary) return all_summaries_list
def tierpsy_trajectories_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3): """ Calculate the trajectory summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, time_windows, time_units, fps, is_manual_index) if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in # initialize list of summaries for all time windows all_summaries_list = [] # loop over time windows for iwin,window in enumerate(time_windows): if timeseries_data[iwin].empty: all_summary = pd.DataFrame([]) else: # initialize list of trajectory summaries for given time window all_summary = [] # loop over worm indexes (individual trajectories) for w_ind, w_ts_data in timeseries_data[iwin].groupby('worm_index'): w_blobs = blob_features[iwin].loc[w_ts_data.index] w_ts_data = w_ts_data.reset_index(drop=True) w_blobs = w_blobs.reset_index(drop=True) worm_feats = get_summary_stats(w_ts_data, fps, w_blobs, delta_time) # returns empty dataframe when w_ts_data is empty worm_feats = pd.DataFrame(worm_feats).T worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps) all_summary.append(worm_feats) # concatenate all trajectories in given time window into one dataframe all_summary = pd.concat(all_summary, ignore_index=True, sort=False) # add dataframe to the list of summaries for all time windows all_summaries_list.append(all_summary) return all_summaries_list
def tierpsy_plate_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3): """ Calculate the plate summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, time_windows, time_units, fps, is_manual_index) # if manual annotation was chosen and the trajectories_data does not contain # worm_index_manual, then data_in is None # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in # initialize list of plate summaries for all time windows plate_feats_list = [] for iwin,window in enumerate(time_windows): plate_feats = get_summary_stats(timeseries_data[iwin], fps, blob_features[iwin], delta_time) plate_feats_list.append(pd.DataFrame(plate_feats).T) return plate_feats_list
def tierpsy_trajectories_summary(fname, is_manual_index=False, delta_time=1 / 3): fps = read_fps(fname) data_in = read_data(fname, is_manual_index) if data_in is None: return timeseries_data, blob_features = data_in all_summary = [] for w_ind, w_ts_data in timeseries_data.groupby('worm_index'): w_blobs = blob_features.loc[w_ts_data.index] w_ts_data = w_ts_data.reset_index(drop=True) w_blobs = w_blobs.reset_index(drop=True) worm_feats = get_summary_stats(w_ts_data, fps, w_blobs, delta_time) worm_feats = pd.DataFrame(worm_feats).T worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps) all_summary.append(worm_feats) all_summary = pd.concat(all_summary, ignore_index=True) return all_summary
def ow_plate_summary_augmented(fname, **fold_args): #NOTE: I will only augment the timeseries features. #It is not trivial to include the event features sampling over time. fps = read_fps(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] fold_index = augment_data(features_timeseries, fps=fps, **fold_args) valid_order = None wStats = WormStats() all_summary = [] for i_fold, ind_fold in enumerate(fold_index): timeseries_data_r = features_timeseries[ind_fold].reset_index(drop=True) all_feats = {} for cc in timeseries_data_r: all_feats[cc] = timeseries_data_r[cc].values exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) if valid_order is None: #only calculate this the first time... valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] exp_feats = exp_feats.loc[:, valid_order] exp_feats.insert(0, 'i_fold', i_fold) all_summary.append(exp_feats) all_summary = pd.concat(all_summary, ignore_index=True) return all_summary
def read_file_data(mask_file, feat_file, min_pulse_size_s=3, _is_debug=False): fps = read_fps(mask_file) min_pulse_size = fps * min_pulse_size_s #read features with pd.HDFStore(feat_file, 'r') as fid: timeseries_data = fid['/timeseries_data'] blob_features = fid['/blob_features'] trajectories_data = fid['/trajectories_data'] light_on = read_light_data(mask_file, trajectories_data) if np.nansum(light_on) < min_pulse_size: return turn_on, turn_off = get_pulses_indexes(light_on, min_pulse_size) region_lab = define_regions(light_on.size, turn_on, turn_off) region_size = np.bincount(region_lab)[1:] / fps if _is_debug: plt.figure() #plt.plot(region_lab) plt.plot(light_on) plt.plot(turn_on, light_on[turn_on], 'o') plt.plot(turn_off, light_on[turn_off], 'x') plt.title(os.path.basename(mask_file)) timeseries_data['timestamp'] = timeseries_data['timestamp'].astype(np.int) #label if frame with the corresponding region timeseries_data['region_lab'] = region_lab[timeseries_data['timestamp']] with tables.File(mask_file, 'r') as fid: tot_images = fid.get_node('/mask').shape[0] return timeseries_data, blob_features, fps, region_size, tot_images, len( light_on)
def ow_trajectories_summary(fname): fps = read_fps(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] all_summary = [] valid_order = None wStats = WormStats() for w_ind, w_ts_data in features_timeseries.groupby('worm_index'): ll = ['worm_{}'.format(int(w_ind))] all_feats = read_feat_events(fname, ll) for cc in w_ts_data: all_feats[cc] = w_ts_data[cc].values exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) if valid_order is None: #only calculate this the first time... valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] #remove uncalculated indexes from wStats exp_feats = exp_feats.loc[:, valid_order] assert not 'worm_index' in exp_feats exp_feats = add_trajectory_info(exp_feats, w_ind, w_ts_data, fps) all_summary.append(exp_feats) all_summary = pd.concat(all_summary, ignore_index=True) return all_summary
def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
def alignStageMotion(masked_file, skeletons_file): base_name = get_base_name(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% fps = read_fps(skeletons_file) #%% # Open the information file and read the tracking delay time. # (help from segworm findStageMovement) # 2. The info file contains the tracking delay. This delay represents the # minimum time between stage movements and, conversely, the maximum time it # takes for a stage movement to complete. If the delay is too small, the # stage movements become chaotic. We load the value for the delay. with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() g_mask = fid.get_node('/mask') tot_frames = g_mask.shape[0] # Read the scale conversions, we would need this when we want to convert the pixels into microns pixelPerMicronX = 1 / g_mask._v_attrs['pixels2microns_x'] pixelPerMicronY = 1 / g_mask._v_attrs['pixels2microns_y'] with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000 delay_frames = np.ceil(delay_time * fps) normScale = np.sqrt((pixelPerMicronX**2 + pixelPerMicronX**2) / 2) pixelPerMicronScale = normScale * np.array( (np.sign(pixelPerMicronX), np.sign(pixelPerMicronY))) #% Compute the rotation matrix. #%rotation = 1; angle = np.arctan(pixelPerMicronY / pixelPerMicronX) if angle > 0: angle = np.pi / 4 - angle else: angle = np.pi / 4 + angle cosAngle = np.cos(angle) sinAngle = np.sin(angle) rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle))) #%% #% Ev's code uses the full vectors without dropping frames #% 1. video2Diff differentiates a video frame by frame and outputs the #% differential variance. We load these frame differences. frame_diffs_d = getFrameDiffVar(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% Read the media times and locations from the log file. #% (help from segworm findStageMovement) #% 3. The log file contains the initial stage location at media time 0 as #% well as the subsequent media times and locations per stage movement. Our #% algorithm attempts to match the frame differences in the video (see step #% 1) to the media times in this log file. Therefore, we load these media #% times and stage locations. #%from the .log.csv file mediaTimes = stage_log['stage_time'].values locations = stage_log[['stage_x', 'stage_y']].values #ini stage movement fields with tables.File(skeletons_file, 'r+') as fid: # delete data from previous analysis if any if '/stage_movement' in fid: fid.remove_node('/stage_movement', recursive=True) g_stage_movement = fid.create_group('/', 'stage_movement') g_stage_movement._v_attrs['has_finished'] = 0 #read and prepare timestamp try: video_timestamp_ind = fid.get_node('/timestamp/raw')[:] if np.any(np.isnan(video_timestamp_ind)): raise ValueError() else: video_timestamp_ind = video_timestamp_ind.astype(np.int) except (tables.exceptions.NoSuchNodeError, ValueError): warnings.warn( 'It is corrupt or do not exist. I will assume no dropped frames and deduce it from the number of frames.' ) video_timestamp_ind = np.arange(tot_frames, dtype=np.int) #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: #%i can tolerate one frame (two with respect to the frame_diff) #%extra at the end of the timestamp video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1] dd = video_timestamp_ind - np.min(video_timestamp_ind) - 1 #shift data dd = dd[dd >= 0] #%% if frame_diffs_d.size != dd.size: raise ValueError( 'Number of timestamps do not match the number of frames in the movie.' ) frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan) frame_diffs[dd] = frame_diffs_d #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: # I am saving this data before for debugging purposes g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d) g_stage_movement._v_attrs['fps'] = fps g_stage_movement._v_attrs['delay_frames'] = delay_frames g_stage_movement._v_attrs[ 'microns_per_pixel_scale'] = pixelPerMicronScale g_stage_movement._v_attrs['rotation_matrix'] = rotation_matrix #%% try to run the aligment and return empty data if it fails is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps) stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d) fid.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d) g_stage_movement._v_attrs['has_finished'] = 1 _h_add_stage_position_pix(masked_file, skeletons_file) print_flush(base_name + ' Aligning Stage Motion. Finished.')
base_names = set(map(remove_ext, fnames)) vid_type = ['control', 'herm', 'male'] data = OrderedDict() time_ranges = OrderedDict() for x in vid_type: data[x] = pd.DataFrame() time_ranges[x] = [] for base_name in base_names: print(base_name) skel_file, mask_file = get_names(results_dir, base_name) fps = read_fps(skel_file) expected_pulse_size = fps*expected_pulse_size_s check_window = fps*check_window_s light_on = read_light_data(mask_file) turn_on, turn_off = get_pulses_indexes(light_on, expected_pulse_size) assert turn_on.size == 1 with pd.HDFStore(skel_file, 'r') as fid: #blob_features = fid['/blob_features'] trajectories_data = fid['/trajectories_data'] dat = trajectories_data.loc[trajectories_data['worm_label']!=0, ['worm_label', 'frame_number']] dat['frame_number'] = dat['frame_number'] - turn_on[0]
def tierpsy_trajectories_summary(fname, filter_params, time_windows, time_units, only_abs_ventral=False, selected_feat=None, is_manual_index=False, delta_time=1 / 3): """ Calculate the trajectory summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, filter_params, time_windows, time_units, fps, is_manual_index) if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in is_fov_tosplit = was_fov_split(fname) # is_fov_tosplit = False if is_fov_tosplit: fovsplitter = FOVMultiWellsSplitter(fname) good_wells_df = fovsplitter.wells[['well_name', 'is_good_well']].copy() # print(good_wells_df) # initialize list of summaries for all time windows all_summaries_list = [] # loop over time windows for iwin, window in enumerate(time_windows): if timeseries_data[iwin].empty: all_summary = pd.DataFrame([]) else: # initialize list of trajectory summaries for given time window all_summary = [] # loop over worm indexes (individual trajectories) for w_ind, w_ts_data in timeseries_data[iwin].groupby( 'worm_index'): w_blobs = blob_features[iwin].loc[w_ts_data.index] w_ts_data = w_ts_data.reset_index(drop=True) w_blobs = w_blobs.reset_index(drop=True) worm_feats = get_summary_stats( w_ts_data, fps, w_blobs, delta_time, only_abs_ventral=only_abs_ventral, selected_feat=selected_feat ) # returns empty dataframe when w_ts_data is empty worm_feats['n_skeletons'] = count_skeletons(w_ts_data) worm_feats = pd.DataFrame(worm_feats).T worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps, is_fov_tosplit=is_fov_tosplit) all_summary.append(worm_feats) # concatenate all trajectories in given time window into one dataframe all_summary = pd.concat(all_summary, ignore_index=True, sort=False) # attach whether the wells was good or bad if is_fov_tosplit: # but only do this if we have wells all_summary = all_summary.merge(good_wells_df, on='well_name', how='left') # add dataframe to the list of summaries for all time windows all_summaries_list.append(all_summary) return all_summaries_list
def tierpsy_plate_summary(fname, filter_params, time_windows, time_units, only_abs_ventral=False, selected_feat=None, is_manual_index=False, delta_time=1 / 3): """ Calculate the plate summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, filter_params, time_windows, time_units, fps, is_manual_index) # if manual annotation was chosen and the trajectories_data does not contain # worm_index_manual, then data_in is None # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in # was the fov split in wells? only use the first window to detect that, # and to extract the list of well names is_fov_tosplit = was_fov_split(fname) # is_fov_tosplit = False if is_fov_tosplit: fovsplitter = FOVMultiWellsSplitter(fname) good_wells_df = fovsplitter.wells[['well_name', 'is_good_well']].copy() # print(good_wells_df) # initialize list of plate summaries for all time windows plate_feats_list = [] for iwin, window in enumerate(time_windows): if is_fov_tosplit == False: plate_feats = get_summary_stats(timeseries_data[iwin], fps, blob_features[iwin], delta_time, only_abs_ventral=only_abs_ventral, selected_feat=selected_feat) plate_feats['n_skeletons'] = count_skeletons(timeseries_data[iwin]) plate_feats_list.append(pd.DataFrame(plate_feats).T) else: # get list of well names in this time window # (maybe some wells looked empty during a whole window, # this prevents errors later on) well_names_list = list( set(timeseries_data[iwin]['well_name']) - set(['n/a'])) # create a list of well-specific, one-line long dataframes well_feats_list = [] for well_name in well_names_list: # find entries in timeseries_data[iwin] belonging to the right well idx_well = timeseries_data[iwin]['well_name'] == well_name well_feats = get_summary_stats( timeseries_data[iwin][idx_well].reset_index(), fps, blob_features[iwin][idx_well].reset_index(), delta_time, only_abs_ventral=only_abs_ventral, selected_feat=selected_feat) well_feats['n_skeletons'] = count_skeletons( timeseries_data[iwin][idx_well]) # first prepend the well_name_s to the well_feats series, # then transpose it so it is a single-row dataframe, # and append it to the well_feats_list well_name_s = pd.Series({'well_name': well_name}) well_feats_list.append( pd.DataFrame(pd.concat([well_name_s, well_feats])).T) # check: did we find any well? if len(well_feats_list) == 0: plate_feats_list.append(pd.DataFrame()) else: # now concatenate all the single-row df in well_feats_list in a single df # and append it to the growing list (1 entry = 1 window) plate_feats = pd.concat(well_feats_list, ignore_index=True, sort=False) # import pdb; pdb.set_trace() plate_feats = plate_feats.merge(good_wells_df, on='well_name', how='left') plate_feats_list.append(plate_feats) return plate_feats_list
def _r_fill_trajectories_data(skeletons_file): ''' Read the /trajectories_data, interpolate any dropped frames, change some fields and make sure the data format is 32bit (less space) ''' #%% valid_fields = [ 'timestamp_raw', 'timestamp_time', 'worm_index_joined', 'coord_x', 'coord_y', 'threshold', 'roi_size', 'area', 'frame_number' ] #%% with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data_ori = fid['/trajectories_data'] if 'worm_index_manual' in trajectories_data_ori: valid_fields += ['worm_index_manual', 'worm_label'] trajectories_data = trajectories_data_ori[valid_fields].copy() if 'is_good_skel' in trajectories_data_ori: trajectories_data['was_skeletonized'] = trajectories_data_ori[ 'is_good_skel'] else: with tables.File(skeletons_file, 'r') as fid: skels = fid.get_node('/skeleton') #an skeleton was skeletonized if it is not nan was_skeletonized = ~np.isnan(skels[:, 0, 0]) trajectories_data['was_skeletonized'] = was_skeletonized.astype( np.uint8) trajectories_data['skeleton_id'] = np.int32(-1) trajectories_data[ 'old_trajectory_data_index'] = trajectories_data.index.values.astype( np.int32) #%% #change table to 32bits if necessary for col in trajectories_data: if trajectories_data[col].dtype == np.int64: trajectories_data[col] = trajectories_data[col].astype(np.int32) elif trajectories_data[col].dtype == np.float64: trajectories_data[col] = trajectories_data[col].astype(np.float32) elif trajectories_data[col].dtype == np.bool: trajectories_data[col] = trajectories_data[col].astype(np.uint8) assert set(x for _,x in trajectories_data.dtypes.items()) == \ {np.dtype('uint8'), np.dtype('int32'), np.dtype('float32')} #%% nan_timestamps = trajectories_data['timestamp_raw'].isnull() if nan_timestamps.all(): fps = read_fps(skeletons_file) trajectories_data['timestamp_raw'] = trajectories_data['frame_number'] trajectories_data[ 'timestamp_time'] = trajectories_data['frame_number'] / fps else: if nan_timestamps.any(): warnings.warn( 'There are still some frames with nan timestamps. I am getting read of them.' ) trajectories_data = trajectories_data[~nan_timestamps] #if it is not nan convert this data into int trajectories_data['timestamp_raw'] = trajectories_data[ 'timestamp_raw'].astype(np.int32) dflt_d = { np.dtype('int32'): -1, np.dtype(np.float32): np.nan, np.dtype('uint8'): 0 } dflt_val = tuple( [dflt_d[x] for _, x in trajectories_data.dtypes.items()]) all_worm_data = [] #%% for worm_index, worm_data in trajectories_data.groupby( 'worm_index_joined'): worm_data = worm_data.dropna(subset=['timestamp_raw']) worm_data = worm_data.drop_duplicates(subset=['timestamp_raw'], keep='first') if not (worm_data['frame_number'] == worm_data['timestamp_raw']).all(): worm_data = _fill_dropped_frames(worm_data, dflt_val) all_worm_data.append(worm_data) trajectories_data = pd.concat(all_worm_data, ignore_index=True) #%% return trajectories_data
for frac_type in ['A7-B3', 'A9-B1']: gg = df.groupby('names') a = gg.get_group(frac_type)[region_type].values b = gg.get_group('A10-B0')[region_type].values t, pprob = ttest_ind(a, b) print(frac_type, region_type, pprob) #%% base_name = [x for x in base_names if 'A10_B0' in x][0] feat_file, mask_file = get_names(results_dir, base_name) light_on = read_light_data(mask_file) with pd.HDFStore(feat_file, 'r') as fid: feat_timeseries = fid['/features_timeseries'] feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int) #%% fps = read_fps(feat_file) for worm_index, dat in feat_timeseries.groupby('worm_index'): #print(worm_index) if worm_index !=15: continue yy = dat['head_speed'] xx_i = dat['timestamp'] xx = xx_i/fps rr = (np.min(yy), np.max(yy)) pulse = light_on[xx_i] pulse = pulse*(rr[1]-rr[0]) + rr[0] plt.figure(figsize=(12, 5)) plt.plot(xx, yy)
def test_aligment(masked_file, skeletons_file, is_calculate_diff=False): #%% fps = read_fps(skeletons_file) with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%% #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000; delay_frames = np.ceil(delay_time * fps); mediaTimes = stage_log['stage_time'].values; locations = stage_log[['stage_x', 'stage_y']].values; if is_calculate_diff: frame_diffs, video_timestamp_ind = test_var_diff(masked_file, skeletons_file) else: #%% with tables.File(skeletons_file, 'r') as fid: video_timestamp_ind = fid.get_node('/timestamp/raw')[:].astype(np.int) frame_diffs_d = fid.get_node('/stage_movement/frame_diffs')[:] frame_diffs_d = np.squeeze(frame_diffs_d) #%% # The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1]; dd = video_timestamp_ind - np.min(video_timestamp_ind)-1; #shift data dd = dd[dd>=0]; if frame_diffs_d.size != dd.size: raise(ValueError('the number of frames and time stamps do not match, nothing to do here')) frame_diffs = np.full(np.max(dd)+1, np.nan); frame_diffs[dd] = frame_diffs_d; #%% is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps); print(locations) stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) print(stage_locations) #dd = np.diff(video_timestamp_ind) #print('T2:', np.where(dd!=1), np.unique(dd)) #for x in movesI: print(x[0] + 1, x[1], x[1]-x[0]-1) #%% try: with tables.File(skeletons_file, 'r') as fid: is_stage_move_o = fid.get_node('/stage_movement/is_stage_move')[:] is_stage_move_o = np.squeeze(is_stage_move_o) stage_vec_o = fid.get_node('/stage_movement/stage_vec')[:] stage_vec_o = np.squeeze(stage_vec_o) return (is_stage_move_d, is_stage_move_o), (stage_vec_o, stage_vec_d) except tables.exceptions.NoSuchNodeError: return is_stage_move_d, stage_vec_d
# save_prefix = 'worm_example_small_W{}.npz' # is_WT2 = True mask_video = '/Volumes/behavgenom_archive$/Lidia/MaskedVideos/Optogenetics-day1/AQ3071-ATR_Set1_Ch1_18072017_191322.hdf5' is_WT2 = False skeletons_file = mask_video.replace('MaskedVideos', 'Results').replace( '.hdf5', '_skeletons.hdf5') #%% import pandas as pd with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data[trajectories_data['worm_index_joined'] == 2] #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps / 3)) if coords_smooth_window <= 3: coords_smooth_window = None good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file) for iw, worm_index in enumerate(good_traj_index): print(iw, len(good_traj_index)) worm = WormFromTable(skeletons_file, worm_index, worm_index_type=worm_index_type) if is_WT2: worm.correct_schafer_worm() wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour,
def _get_timeseries_feats(features_file, delta_time=1 / 3): ''' Get the all the time series features from the skeletons ''' timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r') as fid: if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) timeseries_features = [] for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node = fid.get_node('/coordinates/' + p) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features(*args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) timeseries_features.append(feats) _display_progress(ind_n) timeseries_features = pd.concat(timeseries_features, ignore_index=True) return timeseries_features
def updateSkelFile(self, skeletons_file): super().updateSkelFile(skeletons_file) if not self.skeletons_file or self.trajectories_data is None: self.food_coordinates = None return with tables.File(self.skeletons_file, 'r') as fid: if not '/food_cnt_coord' in fid: self.food_coordinates = None self.ui.checkBox_showFood.setEnabled(False) else: self.food_coordinates = fid.get_node('/food_cnt_coord')[:] self.ui.checkBox_showFood.setEnabled(True) #correct the index in case it was given before as worm_index_N if 'worm_index_N' in self.trajectories_data: self.trajectories_data = self.trajectories_data.rename( columns={'worm_index_N': 'worm_index_manual'}) if not 'worm_index_manual' in self.trajectories_data: self.trajectories_data['worm_label'] = self.wlab['U'] self.trajectories_data[ 'worm_index_manual'] = self.trajectories_data[ 'worm_index_joined'] self.updateWormIndexTypeMenu() #read filter skeletons parameters with tables.File(self.skeletons_file, 'r') as skel_fid: # if any of this fields is missing load the default parameters self.param_default = TrackerParams() try: ss = skel_fid.get_node('/provenance_tracking/ske_filt').read() ss = json.loads(ss.decode("utf-8")) saved_func_args = json.loads(ss['func_arguments']) self.feat_filt_param = { x: saved_func_args[x] for x in ['min_num_skel', 'bad_seg_thresh', 'min_displacement'] } except (KeyError, tables.exceptions.NoSuchNodeError): self.feat_filt_param = get_feat_filt_param( self.param_default.p_dict) self.expected_fps = read_fps(self.vfilename) #TODO: THIS IS NOT REALLY THE INDEX I USE IN THE FEATURES FILES. I NEED A MORE CLEVER WAY TO SEE WHAT I AM REALLY FILTERING. dd = { x: self.feat_filt_param[x] for x in ['min_num_skel', 'bad_seg_thresh', 'min_displacement'] } good_traj_index, _ = getValidIndexes( self.trajectories_data, **dd, worm_index_type=self.worm_index_type) self.trajectories_data['is_valid_index'] = self.trajectories_data[ self.worm_index_type].isin(good_traj_index) self.traj_time_grouped = self.trajectories_data.groupby('frame_number') self.traj_for_plot = {} #delete previous plotted trajectories self.updateImage()
exp_df['tot_timestamps'] = np.nan for irow, row in exp_df.iterrows(): print(irow + 1, len(exp_df)) mask_file = row['mask_file'] feat_file = mask_file.replace('MaskedVideos', 'Results').replace( '.hdf5', '_featuresN.hdf5') output = read_file_data(mask_file, feat_file, _is_debug=_is_debug) if output is None: continue else: timeseries_data, blob_features, fps, region_size, tot_images, tot_timestamps = output exp_df.loc[irow, 'has_valid_light'] = True fps = read_fps(mask_file) exp_df.loc[irow, 'video_duration'] = timeseries_data['timestamp'].max() / fps #add duration of each region for ii, val in enumerate(region_size): exp_df.loc[irow, REGION_LABELS_I[ii + 1]] = val exp_df.loc[irow, 'tot_images'] = tot_images exp_df.loc[irow, 'tot_timestamps'] = tot_timestamps r_stats_l = [] for r_lab, r_dat in timeseries_data.groupby('region_lab'): if r_lab not in REGION_LABELS_I: #likely 0 value corresponding a frames between regions continue
def save_timeseries_feats_table(features_file, derivate_delta_time, fovsplitter_param={}): timeseries_features = [] fps = read_fps(features_file) # initialise class for splitting fov if len(fovsplitter_param) > 0: is_fov_tosplit = True assert all(key in fovsplitter_param for key in ['total_n_wells', 'whichsideup', 'well_shape']) assert fovsplitter_param['total_n_wells'] > 0 else: is_fov_tosplit = False print('is fov to split?', is_fov_tosplit) if is_fov_tosplit: # split fov in wells masked_image_file = features_file.replace('Results', 'MaskedVideos') masked_image_file = masked_image_file.replace('_featuresN.hdf5', '.hdf5') # fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file, # total_n_wells=fovsplitter_param['total_n_wells'], # whichsideup=fovsplitter_param['whichsideup'], # well_shape=fovsplitter_param['well_shape']) fovsplitter = FOVMultiWellsSplitter(masked_image_file, **fovsplitter_param) # store wells data in the features file fovsplitter.write_fov_wells_to_file(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32), ('well_name', 'S3')] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index if is_fov_tosplit: feats[ 'well_name'] = fovsplitter.find_well_from_trajectories_data( worm_data) else: feats['well_name'] = 'n/a' # cast well_name to the correct type # (before shuffling columns, so it remains the last entry) # needed because for some reason this does not work: # feats['well_name'] = feats['well_name'].astype('S3') feats['_well_name'] = feats['well_name'].astype('S3') feats.drop(columns='well_name', inplace=True) feats.rename(columns={'_well_name': 'well_name'}, inplace=True) #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-2:] + cols[:-2] cols[1], cols[2] = cols[2], cols[1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
#%% all_data = pd.DataFrame() for irow, row in exp_df.iterrows(): print(irow+1, len(exp_df)) mask_file = row['mask_file'] feat_file = mask_file.replace('MaskedVideos', 'Results').replace('.hdf5', '_featuresN.hdf5') output = read_file_data(mask_file, feat_file, _is_debug = _is_debug) if output is None: continue else: timeseries_data, blob_features, fps, region_size = output exp_df.loc[irow, 'has_valid_light'] = True fps = read_fps(mask_file) exp_df.loc[irow, 'video_duration'] = timeseries_data['timestamp'].max()/fps #add duration of each region for ii, val in enumerate(region_size): exp_df.loc[irow, REGION_LABELS_I[ii+1]] = val #%% r_stats_l = [] for r_lab, r_dat in timeseries_data.groupby('region_lab'): if r_lab not in REGION_LABELS_I: #likely 0 value corresponding a frames between regions continue lab = REGION_LABELS_I[r_lab] r_blob = blob_features.loc[r_dat.index] r_dat = r_dat.reset_index(drop=True)
def alignStageMotion_new(masked_file, skeletons_file): fps = read_fps(skeletons_file) with tables.File(skeletons_file, 'r+') as fid: # delete data from previous analysis if any if not '/stage_movement': g_stage_movement = fid.create_group('/', 'stage_movement') else: g_stage_movement = fid.get_node('/stage_movement') for field in ['stage_vec', 'is_stage_move', 'frame_diffs']: if field in g_stage_movement: fid.remove_node(g_stage_movement, field) g_stage_movement._v_attrs['has_finished'] = 0 video_timestamp_ind = fid.get_node('/timestamp/raw')[:] #I can tolerate a nan in the last position if np.isnan(video_timestamp_ind[-1]): video_timestamp_ind[-1] = video_timestamp_ind[-2] if np.any(np.isnan(video_timestamp_ind)): exit_flag = 80; warnings.warns('The timestamp is corrupt or do not exist.\n No stage correction processed. Exiting with has_finished flag %i.' , exit_flag) #turn on the has_finished flag and exit g_stage_movement._v_attrs['has_finished'] = exit_flag return video_timestamp_ind = video_timestamp_ind.astype(np.int) # Open the information file and read the tracking delay time. # (help from segworm findStageMovement) # 2. The info file contains the tracking delay. This delay represents the # minimum time between stage movements and, conversely, the maximum time it # takes for a stage movement to complete. If the delay is too small, the # stage movements become chaotic. We load the value for the delay. with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() g_mask = fid.get_node('/mask') #%% Read the scale conversions, we would need this when we want to convert the pixels into microns pixelPerMicronX = 1/g_mask._v_attrs['pixels2microns_x'] pixelPerMicronY = 1/g_mask._v_attrs['pixels2microns_y'] with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000; delay_frames = np.ceil(delay_time * fps); normScale = np.sqrt((pixelPerMicronX ^ 2 + pixelPerMicronX ^ 2) / 2); pixelPerMicronScale = normScale * np.array((np.sign(pixelPerMicronX), np.sign(pixelPerMicronY))); #% Compute the rotation matrix. #%rotation = 1; angle = np.atan(pixelPerMicronY / pixelPerMicronX); if angle > 0: angle = np.pi / 4 - angle; else: angle = np.pi / 4 + angle; cosAngle = np.cos(angle); sinAngle = np.sin(angle); rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle))); #%% #% Ev's code uses the full vectors without dropping frames #% 1. video2Diff differentiates a video frame by frame and outputs the #% differential variance. We load these frame differences. frame_diffs_d = getFrameDiffVar(masked_file); #%% Read the media times and locations from the log file. #% (help from segworm findStageMovement) #% 3. The log file contains the initial stage location at media time 0 as #% well as the subsequent media times and locations per stage movement. Our #% algorithm attempts to match the frame differences in the video (see step #% 1) to the media times in this log file. Therefore, we load these media #% times and stage locations. #%from the .log.csv file mediaTimes = stage_log['stage_time'].values; locations = stage_log[['stage_x', 'stage_y']].values; #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: #%i can tolerate one frame (two with respect to the frame_diff) #%extra at the end of the timestamp video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1]; frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan); dd = video_timestamp_ind - np.min(video_timestamp_ind); #shift data dd = dd[dd>=0]; if frame_diffs_d.size != dd.size: exit_flag = 81; warnings.warn('Number of timestamps do not match the number read movie frames.\n No stage correction processed. Exiting with has_finished flag %i.', exit_flag) #%turn on the has_finished flag and exit with tables.File(skeletons_file, 'r+') as fid: fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag return frame_diffs[dd] = frame_diffs_d; #%% try to run the aligment and return empty data if it fails try: is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps); exit_flag = 1; except: exit_flag = 82; warnings.warn('Returning all nan stage vector. Exiting with has_finished flag {}'.format(exit_flag)) with tables.File(skeletons_file, 'r+') as fid: fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag #%remove the if we want to create an empty is_stage_move = np.ones(frame_diffs.size+1); stage_locations = []; movesI = []; #%% stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: g_stage_movement = fid.get_node('/stage_movement') g_stage_movement.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d) g_stage_movement.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d) g_stage_movement.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d) g_stage_movement._v_atttrs['fps'] = fps g_stage_movement._v_atttrs['delay_frames'] = delay_frames g_stage_movement._v_atttrs['microns_per_pixel_scale'] = pixelPerMicronScale g_stage_movement._v_atttrs['rotation_matrix'] = rotation_matrix g_stage_movement._v_attrs['has_finished'] = 1 print_flush('Finished.')
def smooth_skeletons_table(skeletons_file, features_file, is_WT2=False, skel_smooth_window=5, coords_smooth_window_s=0.25, gap_to_interp_s=0.25): #%% #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps * coords_smooth_window_s)) gap_to_interp = int(np.round(fps * gap_to_interp_s)) if coords_smooth_window <= 3: #do not interpolate coords_smooth_window = None trajectories_data = _r_fill_trajectories_data(skeletons_file) #%% trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(skeletons_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) #%% #initialize arrays food_cnt = read_food_contour(skeletons_file) with tables.File(skeletons_file, 'r') as fid: n_segments = fid.get_node('/skeleton').shape[1] with tables.File(features_file, 'w') as fid_features: if food_cnt is not None: fid_features.create_array('/', 'food_cnt_coord', obj=food_cnt.astype(np.float32)) worm_coords_array = {} w_node = fid_features.create_group('/', 'coordinates') for array_name in [ 'skeletons', 'dorsal_contours', 'ventral_contours', 'widths' ]: if array_name != 'widths': a_shape = (0, n_segments, 2) else: a_shape = (0, n_segments) worm_coords_array[array_name] = fid_features.create_earray( w_node, array_name, shape=a_shape, atom=tables.Float32Atom(shape=()), filters=TABLE_FILTERS) tot_skeletons = 0 for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): if worm_data['was_skeletonized'].sum() < 2: continue worm = WormFromTable(skeletons_file, worm_index, worm_index_type='worm_index_joined') if is_WT2: worm.correct_schafer_worm() if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2: warnings.warn('Not enough data to smooth. Empty file?') wormN = worm else: wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour, worm.dorsal_contour, skel_smooth_window=skel_smooth_window, coords_smooth_window=coords_smooth_window, gap_to_interp=gap_to_interp) dat_index = pd.Series(False, index=worm_data['timestamp_raw'].values) try: dat_index[worm.timestamp] = True except ValueError: import pdb pdb.set_trace() #%% skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons tot_skeletons = skeleton_id[-1] + 1 row_ind = worm_data.index[dat_index.values] trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id #%% #add data worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton')) worm_coords_array['dorsal_contours'].append( getattr(wormN, 'dorsal_contour')) worm_coords_array['ventral_contours'].append( getattr(wormN, 'ventral_contour')) worm_coords_array['widths'].append(getattr(wormN, 'widths')) #display progress _display_progress(ind_n + 1) #save trajectories data newT = fid_features.create_table( '/', 'trajectories_data', obj=trajectories_data.to_records(index=False), filters=TABLE_FILTERS) copy_unit_conversions(newT, skeletons_file) newT._v_attrs['is_WT2'] = is_WT2 newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file) #save blob features interpolating in dropped frames and stage movement (WT2) blob_features = _r_fill_blob_features(skeletons_file, trajectories_data, is_WT2) if blob_features is not None: fid_features.create_table( '/', 'blob_features', obj=blob_features.to_records(index=False), filters=TABLE_FILTERS)
def _getData(features_file, READ_FEATURES=False, IS_FOR_WCON=True): if IS_FOR_WCON: lab_prefix = '@OMG ' else: lab_prefix = '' with pd.HDFStore(features_file, 'r') as fid: if not '/features_timeseries' in fid: return {} #empty file nothing to do here features_timeseries = fid['/features_timeseries'] feat_time_group_by_worm = features_timeseries.groupby('worm_index') ventral_side = _get_ventral_side(features_file) with tables.File(features_file, 'r') as fid: #fps used to adjust timestamp to real time fps = read_fps(features_file) #get pointers to some useful data skeletons = fid.get_node('/coordinates/skeletons') dorsal_contours = fid.get_node('/coordinates/dorsal_contours') ventral_contours = fid.get_node('/coordinates/ventral_contours') #let's append the data of each individual worm as a element in a list all_worms_feats = [] #group by iterator will return sorted worm indexes for worm_id, worm_feat_time in feat_time_group_by_worm: worm_id = int(worm_id) #read worm skeletons data worm_skel = skeletons[worm_feat_time.index] worm_dor_cnt = dorsal_contours[worm_feat_time.index] worm_ven_cnt = ventral_contours[worm_feat_time.index] #start ordered dictionary with the basic features worm_basic = OrderedDict() worm_basic['id'] = str(worm_id) worm_basic['head'] = 'L' worm_basic['ventral'] = ventral_side worm_basic[ 'ptail'] = worm_ven_cnt.shape[1] - 1 #index starting with 0 worm_basic['t'] = worm_feat_time[ 'timestamp'].values / fps #convert from frames to seconds worm_basic['x'] = worm_skel[:, :, 0] worm_basic['y'] = worm_skel[:, :, 1] contour = np.hstack((worm_ven_cnt, worm_dor_cnt[:, ::-1, :])) worm_basic['px'] = contour[:, :, 0] worm_basic['py'] = contour[:, :, 1] if READ_FEATURES: worm_features = __addOMGFeat(fid, worm_feat_time, worm_id) for feat in worm_features: worm_basic[lab_prefix + feat] = worm_features[feat] if IS_FOR_WCON: for x in worm_basic: if not x in ['id', 'head', 'ventral', 'ptail']: worm_basic[x] = __reformatForJson(worm_basic[x]) #append features all_worms_feats.append(worm_basic) return all_worms_feats
def save_timeseries_feats_table(features_file, derivate_delta_time): timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32)] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
assert not np.any(exp_feats_df.index.duplicated( )) #If there are duplicated indexes there might be an error here return exp_feats_df #%% if __name__ == '__main__': from tierpsy.helper.params import read_fps #fname = '/Users/ajaver/OneDrive - Imperial College London/aggregation/N2_1_Ch1_29062017_182108_comp3_featuresN.hdf5' #%% fname = '/Volumes/behavgenom_archive$/Avelino/screening/CeNDR/Results/CeNDR_Set1_020617/WN2002_worms10_food1-10_Set1_Pos4_Ch4_02062017_115723_featuresN.hdf5' with pd.HDFStore(fname, 'r') as fid: timeseries_data = fid['/timeseries_data'] blob_features = fid['/blob_features'] fps = read_fps(fname) key_in = None #['motion'] key_ex = None #['fraction'] feat_set = None feat_stats = get_summary_stats(timeseries_data, fps, blob_features, 1 / 3, only_abs_ventral=True, feat_selection=(key_in, key_ex, feat_set)) print(feat_stats)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Nov 24 12:40:17 2017 @author: ajaver """ import glob from tierpsy.helper.params import read_fps dname = '/Volumes/behavgenom_archive$/Ida/**/MaskedVideos/**/*.hdf5' fnames = glob.glob(dname, recursive=True) for f in fnames: try: fps = read_fps(f) print(fps) except: print('bad')