def add_trajectory_info(df_stats, worm_index, timeseries_data, fps): df_stats['worm_index'] = worm_index df_stats['ini_time'] = timeseries_data['timestamp'].min() / fps df_stats['tot_time'] = timeseries_data['timestamp'].size / fps df_stats['frac_valid_skels'] = (~timeseries_data['length'].isnull()).mean() is_fov_tosplit = was_fov_split(timeseries_data) if is_fov_tosplit: try: assert len(set(timeseries_data['well_name']) - set(['n/a'])) == 1, \ "A single trajectory is spanning more than one well!" except: pdb.set_trace() well_name = list(set(timeseries_data['well_name']) - set(['n/a']))[0] df_stats['well_name'] = well_name cols = df_stats.columns.tolist() if not is_fov_tosplit: cols = cols[-4:] + cols[:-4] else: # there's one extra column cols = cols[-5:] + cols[:-5] #import pdb #pdb.set_trace() df_stats = df_stats[cols] return df_stats
def tierpsy_trajectories_summary( fname, time_windows, time_units, only_abs_ventral=False, selected_feat=None, is_manual_index=False, delta_time=1/3): """ Calculate the trajectory summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, time_windows, time_units, fps, is_manual_index) if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in is_fov_tosplit = was_fov_split(timeseries_data[0]) # is_fov_tosplit = False if is_fov_tosplit: fovsplitter = FOVMultiWellsSplitter(fname) good_wells_df = fovsplitter.wells[['well_name','is_good_well']].copy() # print(good_wells_df) # initialize list of summaries for all time windows all_summaries_list = [] # loop over time windows for iwin,window in enumerate(time_windows): if timeseries_data[iwin].empty: all_summary = pd.DataFrame([]) else: # initialize list of trajectory summaries for given time window all_summary = [] # loop over worm indexes (individual trajectories) for w_ind, w_ts_data in timeseries_data[iwin].groupby('worm_index'): w_blobs = blob_features[iwin].loc[w_ts_data.index] w_ts_data = w_ts_data.reset_index(drop=True) w_blobs = w_blobs.reset_index(drop=True) worm_feats = get_summary_stats( w_ts_data, fps, w_blobs, delta_time, only_abs_ventral=only_abs_ventral, selected_feat=selected_feat ) # returns empty dataframe when w_ts_data is empty worm_feats = pd.DataFrame(worm_feats).T worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps) all_summary.append(worm_feats) # concatenate all trajectories in given time window into one dataframe all_summary = pd.concat(all_summary, ignore_index=True, sort=False) # attach whether the wells was good or bad if is_fov_tosplit: # but only do this if we have wells all_summary = all_summary.merge(good_wells_df, on='well_name', how='left') # add dataframe to the list of summaries for all time windows all_summaries_list.append(all_summary) return all_summaries_list
def tierpsy_plate_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3): """ Calculate the plate summaries for a given file fname, within a given time window (units of start time and end time are in frame numbers). """ fps = read_fps(fname) data_in = read_data(fname, time_windows, time_units, fps, is_manual_index) # if manual annotation was chosen and the trajectories_data does not contain # worm_index_manual, then data_in is None # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None if data_in is None: return [pd.DataFrame() for iwin in range(len(time_windows))] timeseries_data, blob_features = data_in # was the fov split in wells? only use the first window to detect that, # and to extract the list of well names is_fov_tosplit = was_fov_split(timeseries_data[0]) # is_fov_tosplit = False # initialize list of plate summaries for all time windows plate_feats_list = [] for iwin,window in enumerate(time_windows): if is_fov_tosplit == False: plate_feats = get_summary_stats(timeseries_data[iwin], fps, blob_features[iwin], delta_time) plate_feats_list.append(pd.DataFrame(plate_feats).T) else: # get list of well names in this time window # (maybe some wells looked empty during a whole window, # this prevents errors later on) well_names_list = list(set(timeseries_data[iwin]['well_name']) - set(['n/a'])) # create a list of well-specific, one-line long dataframes well_feats_list = [] for well_name in well_names_list: # find entries in timeseries_data[iwin] belonging to the right well idx_well = timeseries_data[iwin]['well_name'] == well_name well_feats = get_summary_stats(timeseries_data[iwin][idx_well].reset_index(), fps, blob_features[iwin][idx_well].reset_index(), delta_time) # first prepend the well_name_s to the well_feats series, # then transpose it so it is a single-row dataframe, # and append it to the well_feats_list well_name_s = pd.Series({'well_name':well_name}) well_feats_list.append(pd.DataFrame(pd.concat([well_name_s,well_feats])).T) # check: did we find any well? if len(well_feats_list) == 0: plate_feats_list.append(pd.DataFrame()) else: # now concatenate all the single-row df in well_feats_list in a single df # and append it to the growing list (1 entry = 1 window) plate_feats = pd.concat(well_feats_list, ignore_index=True, sort=False) plate_feats_list.append(plate_feats) return plate_feats_list
def save_feats_stats(features_file, derivate_delta_time): with pd.HDFStore(features_file, 'r') as fid: fps = fid.get_storer('/trajectories_data').attrs['fps'] timeseries_data = fid['/timeseries_data'] blob_features = fid[ '/blob_features'] if '/blob_features' in fid else None is_fov_tosplit = was_fov_split( features_file) # do we need split-FOV sumaries? # check if is_fov_tosplit: assert 'well_name' in timeseries_data.columns, ( 'fov_wells in features file but no well_name in timeseries_data') #Now I want to calculate the stats of the video if is_fov_tosplit: # get summary stats per well and then concatenate them all well_name_list = list(set(timeseries_data['well_name']) - set(['n/a'])) exp_feats = [] for wc, well in enumerate(well_name_list): print('Processing well {} out of {}'.format( wc, len(well_name_list))) idx = timeseries_data['well_name'] == well # calculate stats per well tmp = get_summary_stats(timeseries_data[idx].reset_index(), fps, blob_features[idx].reset_index(), derivate_delta_time) tmp = pd.DataFrame(zip(tmp.index, tmp), columns=['name', 'value']) tmp['well_name'] = well exp_feats.append(tmp) # now concat all exp_feats = pd.concat(exp_feats, ignore_index=True) else: # we don't need to split the FOV exp_feats = get_summary_stats(timeseries_data, fps, blob_features, derivate_delta_time) # save on disk # now if is_fov_tosplit exp_feats is a dataframe, otherwise a series if len(exp_feats) > 0: # different syntax according to df or series if is_fov_tosplit: tot = max(len(x) for x in exp_feats['name']) dtypes = { 'name': 'S{}'.format(tot), 'value': np.float32, 'well_name': 'S3' } exp_feats_rec = exp_feats.to_records(index=False, column_dtypes=dtypes) else: tot = max(len(x) for x in exp_feats.index) dtypes = [('name', 'S{}'.format(tot)), ('value', np.float32)] exp_feats_rec = np.array(list(zip(exp_feats.index, exp_feats)), dtype=dtypes) # write on hdf5 file with tables.File(features_file, 'r+') as fid: for gg in ['/features_stats']: if gg in fid: fid.remove_node(gg) fid.create_table('/', 'features_stats', obj=exp_feats_rec, filters=TABLE_FILTERS)