Beispiel #1
0
def tierpsy_plate_summary_augmented(fname,
                                    filter_params,
                                    time_windows,
                                    time_units,
                                    only_abs_ventral=False,
                                    selected_feat=None,
                                    is_manual_index=False,
                                    delta_time=1 / 3,
                                    **fold_args):

    fps = read_fps(fname)
    data_in = read_data(fname, filter_params, time_windows, time_units, fps,
                        is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    # initialize list of summaries for all time windows
    all_summaries_list = []

    # loop over time windows
    for iwin, window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            fold_index = augment_data(timeseries_data[iwin],
                                      fps=fps,
                                      **fold_args)
            # initialize list of augmented plate summaries for given time window
            all_summary = []
            # loop over folds
            for i_fold, ind_fold in enumerate(fold_index):

                timeseries_data_r = timeseries_data[iwin][
                    ind_fold].reset_index(drop=True)
                blob_features_r = blob_features[iwin][ind_fold].reset_index(
                    drop=True)

                plate_feats = get_summary_stats(
                    timeseries_data_r,
                    fps,
                    blob_features_r,
                    delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat)

                plate_feats = pd.DataFrame(plate_feats).T
                plate_feats.insert(0, 'i_fold', i_fold)

                all_summary.append(plate_feats)

            # concatenate all folds in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
Beispiel #2
0
def tierpsy_trajectories_summary(
        fname, time_windows, time_units, only_abs_ventral=False,
        selected_feat=None, is_manual_index=False, delta_time=1/3):
    """
    Calculate the trajectory summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    is_fov_tosplit = was_fov_split(timeseries_data[0])
    #    is_fov_tosplit = False
    if is_fov_tosplit:
        fovsplitter = FOVMultiWellsSplitter(fname)
        good_wells_df = fovsplitter.wells[['well_name','is_good_well']].copy()
        # print(good_wells_df)

    # initialize list of summaries for all time windows
    all_summaries_list = []
    # loop over time windows
    for iwin,window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            # initialize list of trajectory summaries for given time window
            all_summary = []
            # loop over worm indexes (individual trajectories)
            for w_ind, w_ts_data in timeseries_data[iwin].groupby('worm_index'):
                w_blobs = blob_features[iwin].loc[w_ts_data.index]

                w_ts_data = w_ts_data.reset_index(drop=True)
                w_blobs = w_blobs.reset_index(drop=True)

                worm_feats = get_summary_stats(
                    w_ts_data, fps,  w_blobs, delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat
                    ) # returns empty dataframe when w_ts_data is empty
                worm_feats = pd.DataFrame(worm_feats).T
                worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps)

                all_summary.append(worm_feats)
            # concatenate all trajectories in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)
            # attach whether the wells was good or bad
            if is_fov_tosplit:  #  but only do this if we have wells
                all_summary = all_summary.merge(good_wells_df,
                                                on='well_name',
                                                how='left')

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
Beispiel #3
0
def tierpsy_trajectories_summary(fname,
                                 time_windows,
                                 time_units,
                                 is_manual_index=False,
                                 delta_time=1 / 3):
    """
    Calculate the trajectory summaries for a given file fname, within a given time window 
    (units of start time and end time are in frame numbers). 
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    # initialize list of summaries for all time windows
    all_summaries_list = []
    # loop over time windows
    for iwin, window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            # initialize list of trajectory summaries for given time window
            all_summary = []
            # loop over worm indexes (individual trajectories)
            for w_ind, w_ts_data in timeseries_data[iwin].groupby(
                    'worm_index'):
                w_blobs = blob_features[iwin].loc[w_ts_data.index]

                w_ts_data = w_ts_data.reset_index(drop=True)
                w_blobs = w_blobs.reset_index(drop=True)

                worm_feats = get_summary_stats(
                    w_ts_data, fps, w_blobs, delta_time
                )  # returns empty dataframe when w_ts_data is empty
                worm_feats = pd.DataFrame(worm_feats).T
                worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data,
                                                 fps)

                all_summary.append(worm_feats)
            # concatenate all trajectories in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
def tierpsy_plate_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3):
    """
    Calculate the plate summaries for a given file fname, within a given time window 
    (units of start time and end time are in frame numbers). 
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    
    # if manual annotation was chosen and the trajectories_data does not contain 
    # worm_index_manual, then data_in is None
    # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    
    timeseries_data, blob_features = data_in
    
    # initialize list of plate summaries for all time windows
    plate_feats_list = []
    for iwin,window in enumerate(time_windows):
        plate_feats = get_summary_stats(timeseries_data[iwin], fps,  blob_features[iwin], delta_time)
        plate_feats_list.append(pd.DataFrame(plate_feats).T)
    
    return plate_feats_list
def save_feats_stats(features_file, derivate_delta_time):
    with pd.HDFStore(features_file, 'r') as fid:
        fps = fid.get_storer('/trajectories_data').attrs['fps']
        timeseries_data = fid['/timeseries_data']
        blob_features = fid[
            '/blob_features'] if '/blob_features' in fid else None

    #Now I want to calculate the stats of the video
    exp_feats = get_summary_stats(timeseries_data, fps, blob_features,
                                  derivate_delta_time)

    if len(exp_feats) > 0:
        tot = max(len(x) for x in exp_feats.index)
        dtypes = [('name', 'S{}'.format(tot)), ('value', np.float32)]
        exp_feats_rec = np.array(list(zip(exp_feats.index, exp_feats)),
                                 dtype=dtypes)
        with tables.File(features_file, 'r+') as fid:
            for gg in ['/features_stats']:
                if gg in fid:
                    fid.remove_node(gg)
            fid.create_table('/',
                             'features_stats',
                             obj=exp_feats_rec,
                             filters=TABLE_FILTERS)
def tierpsy_plate_summary(fname,
                          filter_params,
                          time_windows,
                          time_units,
                          only_abs_ventral=False,
                          selected_feat=None,
                          is_manual_index=False,
                          delta_time=1 / 3):
    """
    Calculate the plate summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, filter_params, time_windows, time_units, fps,
                        is_manual_index)

    # if manual annotation was chosen and the trajectories_data does not contain
    # worm_index_manual, then data_in is None
    # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]

    timeseries_data, blob_features = data_in

    # was the fov split in wells? only use the first window to detect that,
    # and to extract the list of well names
    is_fov_tosplit = was_fov_split(fname)
    #    is_fov_tosplit = False

    if is_fov_tosplit:
        fovsplitter = FOVMultiWellsSplitter(fname)
        good_wells_df = fovsplitter.wells[['well_name', 'is_good_well']].copy()
        # print(good_wells_df)

    # initialize list of plate summaries for all time windows
    plate_feats_list = []
    for iwin, window in enumerate(time_windows):
        if is_fov_tosplit == False:
            plate_feats = get_summary_stats(timeseries_data[iwin],
                                            fps,
                                            blob_features[iwin],
                                            delta_time,
                                            only_abs_ventral=only_abs_ventral,
                                            selected_feat=selected_feat)
            plate_feats['n_skeletons'] = count_skeletons(timeseries_data[iwin])
            plate_feats_list.append(pd.DataFrame(plate_feats).T)
        else:
            # get list of well names in this time window
            # (maybe some wells looked empty during a whole window,
            # this prevents errors later on)
            well_names_list = list(
                set(timeseries_data[iwin]['well_name']) - set(['n/a']))
            # create a list of well-specific, one-line long dataframes
            well_feats_list = []
            for well_name in well_names_list:
                # find entries in timeseries_data[iwin] belonging to the right well
                idx_well = timeseries_data[iwin]['well_name'] == well_name
                well_feats = get_summary_stats(
                    timeseries_data[iwin][idx_well].reset_index(),
                    fps,
                    blob_features[iwin][idx_well].reset_index(),
                    delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat)
                well_feats['n_skeletons'] = count_skeletons(
                    timeseries_data[iwin][idx_well])
                # first prepend the well_name_s to the well_feats series,
                # then transpose it so it is a single-row dataframe,
                # and append it to the well_feats_list
                well_name_s = pd.Series({'well_name': well_name})
                well_feats_list.append(
                    pd.DataFrame(pd.concat([well_name_s, well_feats])).T)
            # check: did we find any well?
            if len(well_feats_list) == 0:
                plate_feats_list.append(pd.DataFrame())
            else:
                # now concatenate all the single-row df in well_feats_list in a single df
                # and append it to the growing list (1 entry = 1 window)
                plate_feats = pd.concat(well_feats_list,
                                        ignore_index=True,
                                        sort=False)
                #                import pdb; pdb.set_trace()
                plate_feats = plate_feats.merge(good_wells_df,
                                                on='well_name',
                                                how='left')
                plate_feats_list.append(plate_feats)

    return plate_feats_list
def save_feats_stats(features_file, derivate_delta_time):
    with pd.HDFStore(features_file, 'r') as fid:
        fps = fid.get_storer('/trajectories_data').attrs['fps']
        timeseries_data = fid['/timeseries_data']
        blob_features = fid[
            '/blob_features'] if '/blob_features' in fid else None
        is_fov_tosplit = was_fov_split(
            features_file)  # do we need split-FOV sumaries?

    # check
    if is_fov_tosplit:
        assert 'well_name' in timeseries_data.columns, (
            'fov_wells in features file but no well_name in timeseries_data')

    #Now I want to calculate the stats of the video
    if is_fov_tosplit:
        # get summary stats per well and then concatenate them all
        well_name_list = list(set(timeseries_data['well_name']) - set(['n/a']))
        exp_feats = []
        for wc, well in enumerate(well_name_list):
            print('Processing well {} out of {}'.format(
                wc, len(well_name_list)))
            idx = timeseries_data['well_name'] == well
            # calculate stats per well
            tmp = get_summary_stats(timeseries_data[idx].reset_index(), fps,
                                    blob_features[idx].reset_index(),
                                    derivate_delta_time)
            tmp = pd.DataFrame(zip(tmp.index, tmp), columns=['name', 'value'])
            tmp['well_name'] = well
            exp_feats.append(tmp)

        # now concat all
        exp_feats = pd.concat(exp_feats, ignore_index=True)

    else:  # we don't need to split the FOV

        exp_feats = get_summary_stats(timeseries_data, fps, blob_features,
                                      derivate_delta_time)

    # save on disk
    # now if is_fov_tosplit exp_feats is a dataframe, otherwise a series
    if len(exp_feats) > 0:

        # different syntax according to df or series
        if is_fov_tosplit:
            tot = max(len(x) for x in exp_feats['name'])
            dtypes = {
                'name': 'S{}'.format(tot),
                'value': np.float32,
                'well_name': 'S3'
            }
            exp_feats_rec = exp_feats.to_records(index=False,
                                                 column_dtypes=dtypes)
        else:
            tot = max(len(x) for x in exp_feats.index)
            dtypes = [('name', 'S{}'.format(tot)), ('value', np.float32)]
            exp_feats_rec = np.array(list(zip(exp_feats.index, exp_feats)),
                                     dtype=dtypes)

        # write on hdf5 file
        with tables.File(features_file, 'r+') as fid:
            for gg in ['/features_stats']:
                if gg in fid:
                    fid.remove_node(gg)
            fid.create_table('/',
                             'features_stats',
                             obj=exp_feats_rec,
                             filters=TABLE_FILTERS)