def read_file_data(mask_file, feat_file, min_pulse_size_s=3, _is_debug=False):
    
    fps = read_fps(mask_file)
    min_pulse_size = fps*min_pulse_size_s
     
    light_on = read_light_data(mask_file)
    if np.nansum(light_on) < min_pulse_size:
        return
        
    turn_on, turn_off = get_pulses_indexes(light_on, min_pulse_size)
    region_lab = define_regions(light_on.size, turn_on, turn_off)
    region_size = np.bincount(region_lab)[1:]/fps
    
    if _is_debug:
        plt.figure()
        #plt.plot(region_lab)
        plt.plot(light_on)
        plt.plot(turn_on, light_on[turn_on], 'o')
        plt.plot(turn_off, light_on[turn_off], 'x')
        plt.title(os.path.basename(mask_file))
    #read features
    with pd.HDFStore(feat_file, 'r') as fid:
        timeseries_data = fid['/timeseries_data']
        blob_features = fid['/blob_features']
        
    timeseries_data['timestamp'] = timeseries_data['timestamp'].astype(np.int)
    #label if frame with the corresponding region
    timeseries_data['region_lab'] = region_lab[timeseries_data['timestamp']]
    
    return timeseries_data, blob_features, fps, region_size
def find_rev_frac(results_dir, base_name, expected_pulse_size_s, check_window_s):
    feat_file, mask_file = get_names(results_dir, base_name)
    
    fps = read_fps(feat_file)
    expected_pulse_size = fps*expected_pulse_size_s
    check_window = fps*check_window_s
    
    light_on = read_light_data(mask_file)
    with pd.HDFStore(feat_file, 'r') as fid:
        feat_timeseries = fid['/features_timeseries']
        
    feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int)
    #%%
    #find the indexes where the pulses start and end
    switches = np.diff(light_on.astype(np.int))
    turn_on, = np.where(switches==1)
    turn_off, = np.where(switches==-1)
    assert turn_on.size == turn_off.size
    #%%
    #find the reversal fraction for each pulse
    rev_fracs = []
    for ini, fin in zip(turn_on, turn_off):
        if fin-ini < expected_pulse_size/2:
            #the pulse is too short, let's ignore it
            continue
        
        before = find_pulse_rev_frac(feat_timeseries, ini-expected_pulse_size, check_window)
        centre = find_pulse_rev_frac(feat_timeseries, ini, check_window)
        after = find_pulse_rev_frac(feat_timeseries, fin+expected_pulse_size, check_window)

        
        rev_fracs.append((before, centre, after))
    return rev_fracs
Beispiel #3
0
def read_input(results_dir, base_name, base_window_s, expected_pulse_size_s):
    feat_file, mask_file = get_names(results_dir, base_name)
    fps = read_fps(feat_file)
    expected_pulse_size = fps*expected_pulse_size_s
    base_window = fps*base_window_s
    
    
    
    light_on = read_light_data(mask_file)
    feat_ranges = get_regions(light_on, expected_pulse_size, base_window)
    
    #read features
    with pd.HDFStore(feat_file, 'r') as fid:
        feat_timeseries = fid['/features_timeseries']
    feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int)
    
    for feat in signed_ventral_feats:
        feat_timeseries[feat] = feat_timeseries[feat].abs()
    
    
    return feat_ranges, feat_timeseries, fps
    def __init__(self,
                 file_name,
                 worm_index,
                 use_skel_filter=True,
                 worm_index_type='worm_index_joined',
                 smooth_window=-1,
                 POL_DEGREE_DFLT=3):
        # Populates an empty normalized worm.
        #if it does not exists return 1 as a default, like that we can still calculate the features in pixels and frames, instead of micrometers and seconds.
        self.microns_per_pixel = read_microns_per_pixel(file_name, dflt=1)
        self.fps = read_fps(file_name, dflt=1)

        # savitzky-golay filter polynomial order default
        self.POL_DEGREE_DFLT = POL_DEGREE_DFLT
        # save the input parameters
        self.file_name = file_name
        self.worm_index = worm_index
        self.use_skel_filter = use_skel_filter
        self.worm_index_type = worm_index_type
        # set to less than POL_DEGREE_DFLT to eliminate smoothing
        self.smooth_window = smooth_window

        # smooth window must be an odd number larger than the polynomial degree
        # (savitzky-golay filter requirement)
        if self.smooth_window >= self.POL_DEGREE_DFLT and self.smooth_window % 2 == 0:
            self.smooth_window += 1

        self.ventral_side = 'unknown'
        self._h_read_data()

        # smooth data if required
        if self.smooth_window > self.POL_DEGREE_DFLT:
            # print('Smoothing...')
            self.skeleton = _h_smooth_curve_all(self.skeleton,
                                                window=self.smooth_window)
            self.widths = _h_smooth_curve_all(self.widths,
                                              window=self.smooth_window)

        # assert the dimenssions of the read data are correct
        self._h_assert_data_dim()
Beispiel #5
0
def tierpsy_plate_summary_augmented(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3, **fold_args):
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    # initialize list of summaries for all time windows
    all_summaries_list = []

    # loop over time windows
    for iwin,window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            fold_index = augment_data(timeseries_data[iwin], fps=fps, **fold_args)
            # initialize list of augmented plate summaries for given time window
            all_summary = []
            # loop over folds
            for i_fold, ind_fold in enumerate(fold_index):


                timeseries_data_r = timeseries_data[iwin][ind_fold].reset_index(drop=True)
                blob_features_r = blob_features[iwin][ind_fold].reset_index(drop=True)


                plate_feats = get_summary_stats(timeseries_data_r, fps,  blob_features_r, delta_time)
                plate_feats = pd.DataFrame(plate_feats).T
                plate_feats.insert(0, 'i_fold', i_fold)

                all_summary.append(plate_feats)

            # concatenate all folds in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
Beispiel #6
0
def tierpsy_trajectories_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3):
    """
    Calculate the trajectory summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    # initialize list of summaries for all time windows
    all_summaries_list = []
    # loop over time windows
    for iwin,window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            # initialize list of trajectory summaries for given time window
            all_summary = []
            # loop over worm indexes (individual trajectories)
            for w_ind, w_ts_data in timeseries_data[iwin].groupby('worm_index'):
                w_blobs = blob_features[iwin].loc[w_ts_data.index]

                w_ts_data = w_ts_data.reset_index(drop=True)
                w_blobs = w_blobs.reset_index(drop=True)

                worm_feats = get_summary_stats(w_ts_data, fps,  w_blobs, delta_time) # returns empty dataframe when w_ts_data is empty
                worm_feats = pd.DataFrame(worm_feats).T
                worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps)

                all_summary.append(worm_feats)
            # concatenate all trajectories in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
def tierpsy_plate_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3):
    """
    Calculate the plate summaries for a given file fname, within a given time window 
    (units of start time and end time are in frame numbers). 
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    
    # if manual annotation was chosen and the trajectories_data does not contain 
    # worm_index_manual, then data_in is None
    # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    
    timeseries_data, blob_features = data_in
    
    # initialize list of plate summaries for all time windows
    plate_feats_list = []
    for iwin,window in enumerate(time_windows):
        plate_feats = get_summary_stats(timeseries_data[iwin], fps,  blob_features[iwin], delta_time)
        plate_feats_list.append(pd.DataFrame(plate_feats).T)
    
    return plate_feats_list
def tierpsy_trajectories_summary(fname,
                                 is_manual_index=False,
                                 delta_time=1 / 3):
    fps = read_fps(fname)
    data_in = read_data(fname, is_manual_index)
    if data_in is None:
        return
    timeseries_data, blob_features = data_in

    all_summary = []
    for w_ind, w_ts_data in timeseries_data.groupby('worm_index'):
        w_blobs = blob_features.loc[w_ts_data.index]

        w_ts_data = w_ts_data.reset_index(drop=True)
        w_blobs = w_blobs.reset_index(drop=True)

        worm_feats = get_summary_stats(w_ts_data, fps, w_blobs, delta_time)
        worm_feats = pd.DataFrame(worm_feats).T
        worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps)

        all_summary.append(worm_feats)
    all_summary = pd.concat(all_summary, ignore_index=True)
    return all_summary
Beispiel #9
0
def ow_plate_summary_augmented(fname, **fold_args):
    #NOTE: I will only augment the timeseries features. 
    #It is not trivial to include the event features sampling over time.
    
    fps = read_fps(fname)
    with pd.HDFStore(fname, 'r') as fid:
        features_timeseries = fid['/features_timeseries']
    
    fold_index = augment_data(features_timeseries, fps=fps, **fold_args)
    
    valid_order = None
    wStats = WormStats()
    
    all_summary = []
    for i_fold, ind_fold in enumerate(fold_index):
        timeseries_data_r = features_timeseries[ind_fold].reset_index(drop=True)
        
        
        all_feats = {}
        for cc in timeseries_data_r:
            all_feats[cc] = timeseries_data_r[cc].values
        exp_feats = wStats.getWormStats(all_feats, np.nanmean)
        exp_feats = pd.DataFrame(exp_feats)
        
        if valid_order is None:
            #only calculate this the first time...
            valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields]
        exp_feats = exp_feats.loc[:, valid_order]
        
        exp_feats.insert(0, 'i_fold', i_fold)
        
        
        all_summary.append(exp_feats)
    
    all_summary = pd.concat(all_summary, ignore_index=True)
   
    return all_summary
def read_file_data(mask_file, feat_file, min_pulse_size_s=3, _is_debug=False):

    fps = read_fps(mask_file)
    min_pulse_size = fps * min_pulse_size_s

    #read features
    with pd.HDFStore(feat_file, 'r') as fid:
        timeseries_data = fid['/timeseries_data']
        blob_features = fid['/blob_features']
        trajectories_data = fid['/trajectories_data']

    light_on = read_light_data(mask_file, trajectories_data)
    if np.nansum(light_on) < min_pulse_size:
        return

    turn_on, turn_off = get_pulses_indexes(light_on, min_pulse_size)
    region_lab = define_regions(light_on.size, turn_on, turn_off)
    region_size = np.bincount(region_lab)[1:] / fps

    if _is_debug:
        plt.figure()
        #plt.plot(region_lab)
        plt.plot(light_on)
        plt.plot(turn_on, light_on[turn_on], 'o')
        plt.plot(turn_off, light_on[turn_off], 'x')
        plt.title(os.path.basename(mask_file))

    timeseries_data['timestamp'] = timeseries_data['timestamp'].astype(np.int)
    #label if frame with the corresponding region
    timeseries_data['region_lab'] = region_lab[timeseries_data['timestamp']]

    with tables.File(mask_file, 'r') as fid:
        tot_images = fid.get_node('/mask').shape[0]

    return timeseries_data, blob_features, fps, region_size, tot_images, len(
        light_on)
Beispiel #11
0
def ow_trajectories_summary(fname):
    
    fps = read_fps(fname)
    with pd.HDFStore(fname, 'r') as fid:
        features_timeseries = fid['/features_timeseries']
    
    all_summary = []
    
    valid_order = None
    wStats = WormStats()
    for w_ind, w_ts_data in features_timeseries.groupby('worm_index'):
        
        ll = ['worm_{}'.format(int(w_ind))]
        all_feats = read_feat_events(fname, ll)
        for cc in w_ts_data:
            all_feats[cc] = w_ts_data[cc].values
        
        
        exp_feats = wStats.getWormStats(all_feats, np.nanmean)
        exp_feats = pd.DataFrame(exp_feats)
        
        if valid_order is None:
            #only calculate this the first time...
            valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields]
        
        #remove uncalculated indexes from wStats
        exp_feats = exp_feats.loc[:, valid_order]
        assert not 'worm_index' in exp_feats
        
        exp_feats = add_trajectory_info(exp_feats, w_ind, w_ts_data, fps)
        
        
        all_summary.append(exp_feats)
    all_summary = pd.concat(all_summary, ignore_index=True)

    return all_summary
Beispiel #12
0
def getWormFeaturesFilt(
        skeletons_file,
        features_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm,
        feat_filt_param,
        split_traj_time):
    
    feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param)


    def _iniFileGroups():
        # initialize groups for the timeseries and event features
        header_timeseries = {
            feat: tables.Float32Col(
                pos=ii) for ii, (feat, _) in enumerate(
                wStats.feat_timeseries_dtype)}
                
        table_timeseries = features_fid.create_table(
            '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS)

        # save some data used in the calculation as attributes
        fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file)
        table_timeseries._v_attrs['worm_index_type'] = worm_index_type

        # node to save features events
        group_events = features_fid.create_group('/', 'features_events')

        # save the skeletons
        with tables.File(skeletons_file, 'r') as ske_file_id:
            skel_shape = ske_file_id.get_node('/skeleton').shape

        

        worm_coords_array = {}
        w_node = features_fid.create_group('/', 'coordinates')
        for  array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']:
            worm_coords_array[array_name] = features_fid.create_earray(
                w_node,
                array_name,
                shape=(
                    0,
                    skel_shape[1],
                    skel_shape[2]),
                atom=tables.Float32Atom(
                    shape=()),
                filters=TABLE_FILTERS)
        
        # initialize rec array with the averaged features of each worm
        stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV}
    
        return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df
    
    progress_timer = TimeCounter('')
    def _displayProgress(n):
            # display progress
        dd = " Extracting features. Worm %i of %i done." % (n, tot_worms)
        print_flush(
            base_name +
            dd +
            ' Total time:' +
            progress_timer.get_time_str())

    #get the valid number of worms
    good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm, 
        feat_filt_param)
    
    fps = read_fps(skeletons_file)
    split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer
    
    # function to calculate the progress time. Useful to display progress
    base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0]
    
    with tables.File(features_file, 'w') as features_fid:
        #check if the stage was not aligned correctly. Return empty features file otherwise.
        with tables.File(skeletons_file, 'r') as skel_fid:
            if '/experiment_info' in skel_fid:
                dd = skel_fid.get_node('/experiment_info').read()
                features_fid.create_array(
                    '/', 'experiment_info', obj=dd)

        #total number of worms
        tot_worms = len(good_traj_index)
        if tot_worms == 0:
            print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.')
            return

        # initialize by getting the specs data subdivision
        wStats = WormStats()
        all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV}
    

        #initialize file
        header_timeseries, table_timeseries, group_events, \
        worm_coords_array, stats_features_df = _iniFileGroups()



        _displayProgress(0)
        # start to calculate features for each worm trajectory
        for ind_N, worm_index in enumerate(good_traj_index):
            # initialize worm object, and extract data from skeletons file
            worm = WormFromTable(
            skeletons_file,
            worm_index,
            use_skel_filter=use_skel_filter,
            worm_index_type=worm_index_type,
            smooth_window=5)
            
            if is_single_worm:
                #worm with the stage correction applied
                worm.correct_schafer_worm()
                if np.all(np.isnan(worm.skeleton[:, 0, 0])):
                    print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index))
                    return
            # calculate features
            timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats)
            
            #get splitted features
            splitted_worms = [x for x in worm.split(split_traj_frames) 
            if x.n_valid_skel > feat_filt_param['min_num_skel'] and 
            x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']]
            
            dd = [getFeatStats(x, wStats)[1] for x in splitted_worms]
            splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV}

            #% add data to save
            # save timeseries data
            table_timeseries.append(timeseries_data)
            table_timeseries.flush()


            # save skeletons
            worm_coords_array['skeletons'].append(worm.skeleton)
            worm_coords_array['dorsal_contours'].append(worm.dorsal_contour)
            worm_coords_array['ventral_contours'].append(worm.ventral_contour)
            
            # save event data as a subgroup per worm
            worm_node = features_fid.create_group(
                group_events, 'worm_%i' % worm_index)
            worm_node._v_attrs['worm_index'] = worm_index
            worm_node._v_attrs['frame_range'] = np.array(
                (worm.first_frame, worm.last_frame))

            for feat in events_data:
                tmp_data = events_data[feat]
                # consider the cases where the output is a single number, empty
                # or None
                if isinstance(tmp_data, (float, int)):
                    tmp_data = np.array([tmp_data])
                if tmp_data is None or tmp_data.size == 0:
                    tmp_data = np.array([np.nan])
                features_fid.create_carray(
                    worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS)

            # store the average for each worm feature
            for stat in FUNC_FOR_DIV:
                stats_features_df[stat][ind_N] = worm_stats[stat]
                
                #append the splitted traj features
                all_splitted_feats[stat] += splitted_feats[stat]
            # report progress
            _displayProgress(ind_N + 1)
        # create and save a table containing the averaged worm feature for each
        # worm
       
        f_node = features_fid.create_group('/', 'features_summary')
        for stat, stats_df in stats_features_df.items():
            splitted_feats = all_splitted_feats[stat]

            #check that the array is not empty
            if len(splitted_feats) > 0:
                splitted_feats_arr = np.array(splitted_feats)
            else:
                #return a row full of nan to indicate a fail
                splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype)

            features_fid.create_table(
                f_node, 
                stat, 
                obj = stats_df, 
                filters = TABLE_FILTERS
                )
            
            feat_stat_split = features_fid.create_table(
                f_node, 
                stat + '_split', 
                obj=splitted_feats_arr, 
                filters=TABLE_FILTERS
                )
            feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames
        
            

            if stat == 'means':
                #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on.
                features_fid.create_table(
                    '/', 
                    'features_means', 
                    obj = stats_df, 
                    filters = TABLE_FILTERS
                    )
                
                features_fid.create_table(
                    '/', 
                    'features_means_split', 
                    obj=splitted_feats_arr, 
                    filters=TABLE_FILTERS
                    )
        
        
    print_flush(
        base_name +
        ' Feature extraction finished: ' +
        progress_timer.get_time_str())
def alignStageMotion(masked_file, skeletons_file):

    base_name = get_base_name(masked_file)
    print_flush(base_name + ' Aligning Stage Motion...')
    #%%
    fps = read_fps(skeletons_file)

    #%%
    # Open the information file and read the tracking delay time.
    # (help from segworm findStageMovement)
    # 2. The info file contains the tracking delay. This delay represents the
    # minimum time between stage movements and, conversely, the maximum time it
    # takes for a stage movement to complete. If the delay is too small, the
    # stage movements become chaotic. We load the value for the delay.
    with tables.File(masked_file, 'r') as fid:
        xml_info = fid.get_node('/xml_info').read().decode()
        g_mask = fid.get_node('/mask')

        tot_frames = g_mask.shape[0]
        # Read the scale conversions, we would need this when we want to convert the pixels into microns
        pixelPerMicronX = 1 / g_mask._v_attrs['pixels2microns_x']
        pixelPerMicronY = 1 / g_mask._v_attrs['pixels2microns_y']

    with pd.HDFStore(masked_file, 'r') as fid:
        stage_log = fid['/stage_log']

    #%this is not the cleaneast but matlab does not have a xml parser from
    #%text string
    delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0]
    delay_time = float(delay_str) / 1000
    delay_frames = np.ceil(delay_time * fps)

    normScale = np.sqrt((pixelPerMicronX**2 + pixelPerMicronX**2) / 2)
    pixelPerMicronScale = normScale * np.array(
        (np.sign(pixelPerMicronX), np.sign(pixelPerMicronY)))

    #% Compute the rotation matrix.
    #%rotation = 1;
    angle = np.arctan(pixelPerMicronY / pixelPerMicronX)
    if angle > 0:
        angle = np.pi / 4 - angle
    else:
        angle = np.pi / 4 + angle

    cosAngle = np.cos(angle)
    sinAngle = np.sin(angle)
    rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle)))
    #%%
    #% Ev's code uses the full vectors without dropping frames
    #% 1. video2Diff differentiates a video frame by frame and outputs the
    #% differential variance. We load these frame differences.
    frame_diffs_d = getFrameDiffVar(masked_file)

    print_flush(base_name + ' Aligning Stage Motion...')
    #%% Read the media times and locations from the log file.
    #% (help from segworm findStageMovement)
    #% 3. The log file contains the initial stage location at media time 0 as
    #% well as the subsequent media times and locations per stage movement. Our
    #% algorithm attempts to match the frame differences in the video (see step
    #% 1) to the media times in this log file. Therefore, we load these media
    #% times and stage locations.
    #%from the .log.csv file
    mediaTimes = stage_log['stage_time'].values
    locations = stage_log[['stage_x', 'stage_y']].values

    #ini stage movement fields
    with tables.File(skeletons_file, 'r+') as fid:
        # delete data from previous analysis if any
        if '/stage_movement' in fid:
            fid.remove_node('/stage_movement', recursive=True)
        g_stage_movement = fid.create_group('/', 'stage_movement')
        g_stage_movement._v_attrs['has_finished'] = 0

        #read and prepare timestamp
        try:
            video_timestamp_ind = fid.get_node('/timestamp/raw')[:]
            if np.any(np.isnan(video_timestamp_ind)):
                raise ValueError()
            else:
                video_timestamp_ind = video_timestamp_ind.astype(np.int)
        except (tables.exceptions.NoSuchNodeError, ValueError):
            warnings.warn(
                'It is corrupt or do not exist. I will assume no dropped frames and deduce it from the number of frames.'
            )
            video_timestamp_ind = np.arange(tot_frames, dtype=np.int)

    #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames.
    if video_timestamp_ind.size > frame_diffs_d.size + 1:
        #%i can tolerate one frame (two with respect to the frame_diff)
        #%extra at the end of the timestamp
        video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1]

    dd = video_timestamp_ind - np.min(video_timestamp_ind) - 1
    #shift data
    dd = dd[dd >= 0]
    #%%
    if frame_diffs_d.size != dd.size:
        raise ValueError(
            'Number of timestamps do not match the number of frames in the movie.'
        )

    frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan)
    frame_diffs[dd] = frame_diffs_d

    #%% save stage data into the skeletons.hdf5
    with tables.File(skeletons_file, 'r+') as fid:
        # I am saving this data before for debugging purposes
        g_stage_movement = fid.get_node('/stage_movement')
        fid.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d)
        g_stage_movement._v_attrs['fps'] = fps
        g_stage_movement._v_attrs['delay_frames'] = delay_frames
        g_stage_movement._v_attrs[
            'microns_per_pixel_scale'] = pixelPerMicronScale
        g_stage_movement._v_attrs['rotation_matrix'] = rotation_matrix

    #%% try to run the aligment and return empty data if it fails
    is_stage_move, movesI, stage_locations = \
    findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps)
    stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI,
                                                   stage_locations,
                                                   video_timestamp_ind)

    #%% save stage data into the skeletons.hdf5
    with tables.File(skeletons_file, 'r+') as fid:
        g_stage_movement = fid.get_node('/stage_movement')
        fid.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d)
        fid.create_carray(g_stage_movement,
                          'is_stage_move',
                          obj=is_stage_move_d)
        g_stage_movement._v_attrs['has_finished'] = 1

    _h_add_stage_position_pix(masked_file, skeletons_file)
    print_flush(base_name + ' Aligning Stage Motion. Finished.')
Beispiel #14
0
 base_names = set(map(remove_ext, fnames))
 
 vid_type = ['control', 'herm', 'male'] 
 data = OrderedDict()
 time_ranges = OrderedDict()
 for x in vid_type:
     data[x] = pd.DataFrame()
     time_ranges[x] = []
 
 
 for base_name in base_names:
     print(base_name)
     skel_file, mask_file = get_names(results_dir, base_name)
     
     
     fps = read_fps(skel_file)
     expected_pulse_size = fps*expected_pulse_size_s
     check_window = fps*check_window_s
     
     light_on = read_light_data(mask_file)
     
     turn_on, turn_off = get_pulses_indexes(light_on, expected_pulse_size)
     
     assert turn_on.size == 1
     with pd.HDFStore(skel_file, 'r') as fid:
         #blob_features = fid['/blob_features']
         trajectories_data = fid['/trajectories_data']
         
         
     dat = trajectories_data.loc[trajectories_data['worm_label']!=0, ['worm_label', 'frame_number']]
     dat['frame_number'] = dat['frame_number'] - turn_on[0]
def tierpsy_trajectories_summary(fname,
                                 filter_params,
                                 time_windows,
                                 time_units,
                                 only_abs_ventral=False,
                                 selected_feat=None,
                                 is_manual_index=False,
                                 delta_time=1 / 3):
    """
    Calculate the trajectory summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, filter_params, time_windows, time_units, fps,
                        is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    is_fov_tosplit = was_fov_split(fname)
    #    is_fov_tosplit = False
    if is_fov_tosplit:
        fovsplitter = FOVMultiWellsSplitter(fname)
        good_wells_df = fovsplitter.wells[['well_name', 'is_good_well']].copy()
        # print(good_wells_df)

    # initialize list of summaries for all time windows
    all_summaries_list = []
    # loop over time windows
    for iwin, window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            # initialize list of trajectory summaries for given time window
            all_summary = []
            # loop over worm indexes (individual trajectories)
            for w_ind, w_ts_data in timeseries_data[iwin].groupby(
                    'worm_index'):
                w_blobs = blob_features[iwin].loc[w_ts_data.index]

                w_ts_data = w_ts_data.reset_index(drop=True)
                w_blobs = w_blobs.reset_index(drop=True)

                worm_feats = get_summary_stats(
                    w_ts_data,
                    fps,
                    w_blobs,
                    delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat
                )  # returns empty dataframe when w_ts_data is empty
                worm_feats['n_skeletons'] = count_skeletons(w_ts_data)
                worm_feats = pd.DataFrame(worm_feats).T
                worm_feats = add_trajectory_info(worm_feats,
                                                 w_ind,
                                                 w_ts_data,
                                                 fps,
                                                 is_fov_tosplit=is_fov_tosplit)

                all_summary.append(worm_feats)
            # concatenate all trajectories in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)
            # attach whether the wells was good or bad
            if is_fov_tosplit:  #  but only do this if we have wells
                all_summary = all_summary.merge(good_wells_df,
                                                on='well_name',
                                                how='left')

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
def tierpsy_plate_summary(fname,
                          filter_params,
                          time_windows,
                          time_units,
                          only_abs_ventral=False,
                          selected_feat=None,
                          is_manual_index=False,
                          delta_time=1 / 3):
    """
    Calculate the plate summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, filter_params, time_windows, time_units, fps,
                        is_manual_index)

    # if manual annotation was chosen and the trajectories_data does not contain
    # worm_index_manual, then data_in is None
    # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]

    timeseries_data, blob_features = data_in

    # was the fov split in wells? only use the first window to detect that,
    # and to extract the list of well names
    is_fov_tosplit = was_fov_split(fname)
    #    is_fov_tosplit = False

    if is_fov_tosplit:
        fovsplitter = FOVMultiWellsSplitter(fname)
        good_wells_df = fovsplitter.wells[['well_name', 'is_good_well']].copy()
        # print(good_wells_df)

    # initialize list of plate summaries for all time windows
    plate_feats_list = []
    for iwin, window in enumerate(time_windows):
        if is_fov_tosplit == False:
            plate_feats = get_summary_stats(timeseries_data[iwin],
                                            fps,
                                            blob_features[iwin],
                                            delta_time,
                                            only_abs_ventral=only_abs_ventral,
                                            selected_feat=selected_feat)
            plate_feats['n_skeletons'] = count_skeletons(timeseries_data[iwin])
            plate_feats_list.append(pd.DataFrame(plate_feats).T)
        else:
            # get list of well names in this time window
            # (maybe some wells looked empty during a whole window,
            # this prevents errors later on)
            well_names_list = list(
                set(timeseries_data[iwin]['well_name']) - set(['n/a']))
            # create a list of well-specific, one-line long dataframes
            well_feats_list = []
            for well_name in well_names_list:
                # find entries in timeseries_data[iwin] belonging to the right well
                idx_well = timeseries_data[iwin]['well_name'] == well_name
                well_feats = get_summary_stats(
                    timeseries_data[iwin][idx_well].reset_index(),
                    fps,
                    blob_features[iwin][idx_well].reset_index(),
                    delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat)
                well_feats['n_skeletons'] = count_skeletons(
                    timeseries_data[iwin][idx_well])
                # first prepend the well_name_s to the well_feats series,
                # then transpose it so it is a single-row dataframe,
                # and append it to the well_feats_list
                well_name_s = pd.Series({'well_name': well_name})
                well_feats_list.append(
                    pd.DataFrame(pd.concat([well_name_s, well_feats])).T)
            # check: did we find any well?
            if len(well_feats_list) == 0:
                plate_feats_list.append(pd.DataFrame())
            else:
                # now concatenate all the single-row df in well_feats_list in a single df
                # and append it to the growing list (1 entry = 1 window)
                plate_feats = pd.concat(well_feats_list,
                                        ignore_index=True,
                                        sort=False)
                #                import pdb; pdb.set_trace()
                plate_feats = plate_feats.merge(good_wells_df,
                                                on='well_name',
                                                how='left')
                plate_feats_list.append(plate_feats)

    return plate_feats_list
Beispiel #17
0
def _r_fill_trajectories_data(skeletons_file):
    '''
    Read the /trajectories_data, interpolate any dropped frames,
    change some fields and make sure the data format is 32bit (less space)
    '''
    #%%
    valid_fields = [
        'timestamp_raw', 'timestamp_time', 'worm_index_joined', 'coord_x',
        'coord_y', 'threshold', 'roi_size', 'area', 'frame_number'
    ]
    #%%
    with pd.HDFStore(skeletons_file, 'r') as fid:
        trajectories_data_ori = fid['/trajectories_data']
        if 'worm_index_manual' in trajectories_data_ori:
            valid_fields += ['worm_index_manual', 'worm_label']

    trajectories_data = trajectories_data_ori[valid_fields].copy()
    if 'is_good_skel' in trajectories_data_ori:
        trajectories_data['was_skeletonized'] = trajectories_data_ori[
            'is_good_skel']
    else:
        with tables.File(skeletons_file, 'r') as fid:
            skels = fid.get_node('/skeleton')
            #an skeleton was skeletonized if it is not nan
            was_skeletonized = ~np.isnan(skels[:, 0, 0])
            trajectories_data['was_skeletonized'] = was_skeletonized.astype(
                np.uint8)

    trajectories_data['skeleton_id'] = np.int32(-1)
    trajectories_data[
        'old_trajectory_data_index'] = trajectories_data.index.values.astype(
            np.int32)
    #%%
    #change table to 32bits if necessary
    for col in trajectories_data:
        if trajectories_data[col].dtype == np.int64:
            trajectories_data[col] = trajectories_data[col].astype(np.int32)
        elif trajectories_data[col].dtype == np.float64:
            trajectories_data[col] = trajectories_data[col].astype(np.float32)
        elif trajectories_data[col].dtype == np.bool:
            trajectories_data[col] = trajectories_data[col].astype(np.uint8)

    assert set(x for _,x in trajectories_data.dtypes.items()) == \
    {np.dtype('uint8'), np.dtype('int32'), np.dtype('float32')}
    #%%
    nan_timestamps = trajectories_data['timestamp_raw'].isnull()
    if nan_timestamps.all():
        fps = read_fps(skeletons_file)
        trajectories_data['timestamp_raw'] = trajectories_data['frame_number']
        trajectories_data[
            'timestamp_time'] = trajectories_data['frame_number'] / fps

    else:
        if nan_timestamps.any():
            warnings.warn(
                'There are still some frames with nan timestamps. I am getting read of them.'
            )
            trajectories_data = trajectories_data[~nan_timestamps]
        #if it is not nan convert this data into int
        trajectories_data['timestamp_raw'] = trajectories_data[
            'timestamp_raw'].astype(np.int32)

        dflt_d = {
            np.dtype('int32'): -1,
            np.dtype(np.float32): np.nan,
            np.dtype('uint8'): 0
        }
        dflt_val = tuple(
            [dflt_d[x] for _, x in trajectories_data.dtypes.items()])

        all_worm_data = []
        #%%
        for worm_index, worm_data in trajectories_data.groupby(
                'worm_index_joined'):

            worm_data = worm_data.dropna(subset=['timestamp_raw'])
            worm_data = worm_data.drop_duplicates(subset=['timestamp_raw'],
                                                  keep='first')

            if not (worm_data['frame_number']
                    == worm_data['timestamp_raw']).all():
                worm_data = _fill_dropped_frames(worm_data, dflt_val)

            all_worm_data.append(worm_data)

        trajectories_data = pd.concat(all_worm_data, ignore_index=True)
        #%%
    return trajectories_data
     for frac_type in ['A7-B3', 'A9-B1']:
         gg = df.groupby('names')
         a = gg.get_group(frac_type)[region_type].values
         b = gg.get_group('A10-B0')[region_type].values    
         t, pprob = ttest_ind(a, b)
         print(frac_type, region_type, pprob)
 #%%
 base_name = [x for x in base_names if 'A10_B0' in x][0]
 feat_file, mask_file = get_names(results_dir, base_name)
 
 light_on = read_light_data(mask_file)
 with pd.HDFStore(feat_file, 'r') as fid:
     feat_timeseries = fid['/features_timeseries']
 feat_timeseries['timestamp'] = feat_timeseries['timestamp'].astype(np.int)
 #%%
 fps = read_fps(feat_file)
 
 for worm_index, dat in feat_timeseries.groupby('worm_index'):
     #print(worm_index)
     if worm_index !=15:
         continue
     yy = dat['head_speed']
     xx_i = dat['timestamp']
     xx = xx_i/fps
     
     rr = (np.min(yy), np.max(yy))
     pulse = light_on[xx_i]
     pulse = pulse*(rr[1]-rr[0]) + rr[0]
     
     plt.figure(figsize=(12, 5))
     plt.plot(xx, yy)
Beispiel #19
0
def test_aligment(masked_file, skeletons_file, is_calculate_diff=False):
    #%%
    fps = read_fps(skeletons_file)
    

    with tables.File(masked_file, 'r') as fid:
        xml_info = fid.get_node('/xml_info').read().decode()
    
    with pd.HDFStore(masked_file, 'r') as fid:
        stage_log = fid['/stage_log']
    
    
            #%%
    #%this is not the cleaneast but matlab does not have a xml parser from
    #%text string
    delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0]
    delay_time = float(delay_str) / 1000;
    delay_frames = np.ceil(delay_time * fps);
    mediaTimes = stage_log['stage_time'].values;
    locations = stage_log[['stage_x', 'stage_y']].values;
    
    if is_calculate_diff:
        frame_diffs, video_timestamp_ind = test_var_diff(masked_file, skeletons_file)
    else:
        #%%
        with tables.File(skeletons_file, 'r') as fid:
            video_timestamp_ind = fid.get_node('/timestamp/raw')[:].astype(np.int)
            frame_diffs_d = fid.get_node('/stage_movement/frame_diffs')[:]
            frame_diffs_d = np.squeeze(frame_diffs_d)
        #%%
        
        # The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames.
        if video_timestamp_ind.size > frame_diffs_d.size + 1:
            video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1];
        
        dd = video_timestamp_ind - np.min(video_timestamp_ind)-1; #shift data
        dd = dd[dd>=0];
        if frame_diffs_d.size != dd.size: 
            raise(ValueError('the number of frames and time stamps do not match, nothing to do here'))
        
        frame_diffs = np.full(np.max(dd)+1, np.nan);
        frame_diffs[dd] = frame_diffs_d;
    
    #%%
    
    is_stage_move, movesI, stage_locations = \
    findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps);
    
    print(locations)
    stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind)
    print(stage_locations)
    
    #dd = np.diff(video_timestamp_ind)
    #print('T2:', np.where(dd!=1), np.unique(dd))
    #for x in movesI: print(x[0] + 1, x[1], x[1]-x[0]-1)
    
    #%%
    try:
        with tables.File(skeletons_file, 'r') as fid:
            is_stage_move_o = fid.get_node('/stage_movement/is_stage_move')[:]
            is_stage_move_o = np.squeeze(is_stage_move_o)
            stage_vec_o = fid.get_node('/stage_movement/stage_vec')[:]
            stage_vec_o = np.squeeze(stage_vec_o)
            return (is_stage_move_d, is_stage_move_o), (stage_vec_o, stage_vec_d)

    except tables.exceptions.NoSuchNodeError:
        return is_stage_move_d, stage_vec_d
Beispiel #20
0
#    save_prefix = 'worm_example_small_W{}.npz'
#    is_WT2 = True

    mask_video = '/Volumes/behavgenom_archive$/Lidia/MaskedVideos/Optogenetics-day1/AQ3071-ATR_Set1_Ch1_18072017_191322.hdf5'
    is_WT2 = False

    skeletons_file = mask_video.replace('MaskedVideos', 'Results').replace(
        '.hdf5', '_skeletons.hdf5')
    #%%
    import pandas as pd
    with pd.HDFStore(skeletons_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']
    trajectories_data[trajectories_data['worm_index_joined'] == 2]

    #%%
    fps = read_fps(skeletons_file)
    coords_smooth_window = int(np.round(fps / 3))
    if coords_smooth_window <= 3:
        coords_smooth_window = None

    good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file)
    for iw, worm_index in enumerate(good_traj_index):
        print(iw, len(good_traj_index))
        worm = WormFromTable(skeletons_file,
                             worm_index,
                             worm_index_type=worm_index_type)
        if is_WT2: worm.correct_schafer_worm()

        wormN = SmoothedWorm(worm.skeleton,
                             worm.widths,
                             worm.ventral_contour,
Beispiel #21
0
def _get_timeseries_feats(features_file, delta_time=1 / 3):
    '''
    Get the all the time series features from the skeletons
    '''
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)

    with tables.File(features_file, 'r') as fid:
        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        timeseries_features = []
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):
                    node = fid.get_node('/coordinates/' + p)

                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(*args,
                                            timestamp=timestamp,
                                            food_cnt=food_cnt,
                                            fps=fps,
                                            ventral_side=ventral_side)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)

            timeseries_features.append(feats)
            _display_progress(ind_n)

        timeseries_features = pd.concat(timeseries_features, ignore_index=True)

    return timeseries_features
Beispiel #22
0
    def updateSkelFile(self, skeletons_file):
        super().updateSkelFile(skeletons_file)

        if not self.skeletons_file or self.trajectories_data is None:
            self.food_coordinates = None
            return

        with tables.File(self.skeletons_file, 'r') as fid:
            if not '/food_cnt_coord' in fid:
                self.food_coordinates = None
                self.ui.checkBox_showFood.setEnabled(False)
            else:
                self.food_coordinates = fid.get_node('/food_cnt_coord')[:]
                self.ui.checkBox_showFood.setEnabled(True)

        #correct the index in case it was given before as worm_index_N
        if 'worm_index_N' in self.trajectories_data:
            self.trajectories_data = self.trajectories_data.rename(
                columns={'worm_index_N': 'worm_index_manual'})

        if not 'worm_index_manual' in self.trajectories_data:
            self.trajectories_data['worm_label'] = self.wlab['U']
            self.trajectories_data[
                'worm_index_manual'] = self.trajectories_data[
                    'worm_index_joined']

        self.updateWormIndexTypeMenu()

        #read filter skeletons parameters
        with tables.File(self.skeletons_file, 'r') as skel_fid:

            # if any of this fields is missing load the default parameters
            self.param_default = TrackerParams()
            try:
                ss = skel_fid.get_node('/provenance_tracking/ske_filt').read()
                ss = json.loads(ss.decode("utf-8"))
                saved_func_args = json.loads(ss['func_arguments'])

                self.feat_filt_param = {
                    x: saved_func_args[x]
                    for x in
                    ['min_num_skel', 'bad_seg_thresh', 'min_displacement']
                }
            except (KeyError, tables.exceptions.NoSuchNodeError):
                self.feat_filt_param = get_feat_filt_param(
                    self.param_default.p_dict)

        self.expected_fps = read_fps(self.vfilename)

        #TODO: THIS IS NOT REALLY THE INDEX I USE IN THE FEATURES FILES. I NEED A MORE CLEVER WAY TO SEE WHAT I AM REALLY FILTERING.
        dd = {
            x: self.feat_filt_param[x]
            for x in ['min_num_skel', 'bad_seg_thresh', 'min_displacement']
        }
        good_traj_index, _ = getValidIndexes(
            self.trajectories_data, **dd, worm_index_type=self.worm_index_type)
        self.trajectories_data['is_valid_index'] = self.trajectories_data[
            self.worm_index_type].isin(good_traj_index)

        self.traj_time_grouped = self.trajectories_data.groupby('frame_number')

        self.traj_for_plot = {}  #delete previous plotted trajectories
        self.updateImage()
    exp_df['tot_timestamps'] = np.nan

    for irow, row in exp_df.iterrows():
        print(irow + 1, len(exp_df))
        mask_file = row['mask_file']
        feat_file = mask_file.replace('MaskedVideos', 'Results').replace(
            '.hdf5', '_featuresN.hdf5')

        output = read_file_data(mask_file, feat_file, _is_debug=_is_debug)
        if output is None:
            continue
        else:
            timeseries_data, blob_features, fps, region_size, tot_images, tot_timestamps = output

        exp_df.loc[irow, 'has_valid_light'] = True
        fps = read_fps(mask_file)
        exp_df.loc[irow,
                   'video_duration'] = timeseries_data['timestamp'].max() / fps
        #add duration of each region
        for ii, val in enumerate(region_size):
            exp_df.loc[irow, REGION_LABELS_I[ii + 1]] = val

        exp_df.loc[irow, 'tot_images'] = tot_images
        exp_df.loc[irow, 'tot_timestamps'] = tot_timestamps

        r_stats_l = []
        for r_lab, r_dat in timeseries_data.groupby('region_lab'):
            if r_lab not in REGION_LABELS_I:
                #likely 0 value corresponding a frames between regions
                continue
def save_timeseries_feats_table(features_file,
                                derivate_delta_time,
                                fovsplitter_param={}):
    timeseries_features = []
    fps = read_fps(features_file)

    # initialise class for splitting fov
    if len(fovsplitter_param) > 0:
        is_fov_tosplit = True
        assert all(key in fovsplitter_param
                   for key in ['total_n_wells', 'whichsideup', 'well_shape'])
        assert fovsplitter_param['total_n_wells'] > 0
    else:
        is_fov_tosplit = False
    print('is fov to split?', is_fov_tosplit)

    if is_fov_tosplit:
        # split fov in wells
        masked_image_file = features_file.replace('Results', 'MaskedVideos')
        masked_image_file = masked_image_file.replace('_featuresN.hdf5',
                                                      '.hdf5')
        #        fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file,
        #                                            total_n_wells=fovsplitter_param['total_n_wells'],
        #                                            whichsideup=fovsplitter_param['whichsideup'],
        #                                            well_shape=fovsplitter_param['well_shape'])
        fovsplitter = FOVMultiWellsSplitter(masked_image_file,
                                            **fovsplitter_param)
        # store wells data in the features file
        fovsplitter.write_fov_wells_to_file(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32),
                       ('well_name', 'S3')] + feat_dtypes

        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):

            skel_id = worm_data['skeleton_id'].values

            #deal with any nan in the skeletons
            good_id = skel_id >= 0
            skel_id_val = skel_id[good_id]
            traj_size = skel_id.size

            args = []
            for p in ('skeletons', 'widths', 'dorsal_contours',
                      'ventral_contours'):

                node_str = '/coordinates/' + p
                if node_str in fid:
                    node = fid.get_node(node_str)
                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd
                else:
                    dat = None

                args.append(dat)

            timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            if is_fov_tosplit:
                feats[
                    'well_name'] = fovsplitter.find_well_from_trajectories_data(
                        worm_data)
            else:
                feats['well_name'] = 'n/a'
            # cast well_name to the correct type
            # (before shuffling columns, so it remains the last entry)
            # needed because for some reason this does not work:
            # feats['well_name'] = feats['well_name'].astype('S3')
            feats['_well_name'] = feats['well_name'].astype('S3')
            feats.drop(columns='well_name', inplace=True)
            feats.rename(columns={'_well_name': 'well_name'}, inplace=True)

            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-2:] + cols[:-2]
            cols[1], cols[2] = cols[2], cols[1]

            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
Beispiel #25
0
 #%%
 
 all_data = pd.DataFrame()
 for irow, row in exp_df.iterrows():
     print(irow+1, len(exp_df))
     mask_file = row['mask_file']
     feat_file = mask_file.replace('MaskedVideos', 'Results').replace('.hdf5', '_featuresN.hdf5')
     
     output = read_file_data(mask_file, feat_file, _is_debug = _is_debug)
     if output is None:
         continue
     else:
         timeseries_data, blob_features, fps, region_size = output
     
     exp_df.loc[irow, 'has_valid_light'] = True
     fps = read_fps(mask_file)
     exp_df.loc[irow, 'video_duration'] = timeseries_data['timestamp'].max()/fps
     #add duration of each region
     for ii, val in enumerate(region_size):
         exp_df.loc[irow, REGION_LABELS_I[ii+1]] = val
     #%%
     r_stats_l = []
     for r_lab, r_dat in timeseries_data.groupby('region_lab'):
         if r_lab not in REGION_LABELS_I:
             #likely 0 value corresponding a frames between regions
             continue
         
         lab = REGION_LABELS_I[r_lab]
         r_blob = blob_features.loc[r_dat.index]
         
         r_dat = r_dat.reset_index(drop=True)
def alignStageMotion_new(masked_file, skeletons_file):
    fps = read_fps(skeletons_file)
    
    with tables.File(skeletons_file, 'r+') as fid:
        # delete data from previous analysis if any
        if not '/stage_movement':
            g_stage_movement = fid.create_group('/', 'stage_movement')
        else:
            g_stage_movement = fid.get_node('/stage_movement')

        for field in ['stage_vec', 'is_stage_move', 'frame_diffs']:
            if field in g_stage_movement:
                fid.remove_node(g_stage_movement, field)

        g_stage_movement._v_attrs['has_finished'] = 0
        
        video_timestamp_ind = fid.get_node('/timestamp/raw')[:]
        #I can tolerate a nan in the last position
        if np.isnan(video_timestamp_ind[-1]):
            video_timestamp_ind[-1] = video_timestamp_ind[-2] 
        
        if np.any(np.isnan(video_timestamp_ind)):
            exit_flag = 80;
            warnings.warns('The timestamp is corrupt or do not exist.\n No stage correction processed. Exiting with has_finished flag %i.' , exit_flag)
            #turn on the has_finished flag and exit
            g_stage_movement._v_attrs['has_finished'] = exit_flag
            return
        video_timestamp_ind = video_timestamp_ind.astype(np.int)
    
    # Open the information file and read the tracking delay time.
    # (help from segworm findStageMovement)
    # 2. The info file contains the tracking delay. This delay represents the
    # minimum time between stage movements and, conversely, the maximum time it
    # takes for a stage movement to complete. If the delay is too small, the
    # stage movements become chaotic. We load the value for the delay.
    with tables.File(masked_file, 'r') as fid:
        xml_info = fid.get_node('/xml_info').read().decode()
        g_mask = fid.get_node('/mask')
        #%% Read the scale conversions, we would need this when we want to convert the pixels into microns
        pixelPerMicronX = 1/g_mask._v_attrs['pixels2microns_x']
        pixelPerMicronY = 1/g_mask._v_attrs['pixels2microns_y']

    with pd.HDFStore(masked_file, 'r') as fid:
        stage_log = fid['/stage_log']
    
    #%this is not the cleaneast but matlab does not have a xml parser from
    #%text string
    delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0]
    delay_time = float(delay_str) / 1000;
    delay_frames = np.ceil(delay_time * fps);
    
    normScale = np.sqrt((pixelPerMicronX ^ 2 + pixelPerMicronX ^ 2) / 2);
    pixelPerMicronScale =  normScale * np.array((np.sign(pixelPerMicronX), np.sign(pixelPerMicronY)));
    
    #% Compute the rotation matrix.
    #%rotation = 1;
    angle = np.atan(pixelPerMicronY / pixelPerMicronX);
    if angle > 0:
        angle = np.pi / 4 - angle;
    else:
        angle = np.pi / 4 + angle;
    
    cosAngle = np.cos(angle);
    sinAngle = np.sin(angle);
    rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle)));
    #%%
    #% Ev's code uses the full vectors without dropping frames
    #% 1. video2Diff differentiates a video frame by frame and outputs the
    #% differential variance. We load these frame differences.
    frame_diffs_d = getFrameDiffVar(masked_file);

    #%% Read the media times and locations from the log file.
    #% (help from segworm findStageMovement)
    #% 3. The log file contains the initial stage location at media time 0 as
    #% well as the subsequent media times and locations per stage movement. Our
    #% algorithm attempts to match the frame differences in the video (see step
    #% 1) to the media times in this log file. Therefore, we load these media
    #% times and stage locations.
    #%from the .log.csv file
    mediaTimes = stage_log['stage_time'].values;
    locations = stage_log[['stage_x', 'stage_y']].values;
    
    #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames.
    if video_timestamp_ind.size > frame_diffs_d.size + 1:
        #%i can tolerate one frame (two with respect to the frame_diff)
        #%extra at the end of the timestamp
        video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1];
    
    
    frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan);
    dd = video_timestamp_ind - np.min(video_timestamp_ind); #shift data
    dd = dd[dd>=0];
    
    if frame_diffs_d.size != dd.size:
        exit_flag = 81;
        warnings.warn('Number of timestamps do not match the number read movie frames.\n No stage correction processed. Exiting with has_finished flag %i.', exit_flag)
        #%turn on the has_finished flag and exit
        
        with tables.File(skeletons_file, 'r+') as fid:
             fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag
        return
    
    frame_diffs[dd] = frame_diffs_d;
    
    
    #%% try to run the aligment and return empty data if it fails 
    try:
        is_stage_move, movesI, stage_locations = \
        findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps);
        exit_flag = 1;
    except:
        exit_flag = 82;
        warnings.warn('Returning all nan stage vector. Exiting with has_finished flag {}'.format(exit_flag))
        
        with tables.File(skeletons_file, 'r+') as fid:
             fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag
        
        #%remove the if we want to create an empty 
        is_stage_move = np.ones(frame_diffs.size+1);
        stage_locations = [];
        movesI = [];
    
    #%% 
    stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind)
    #%% save stage data into the skeletons.hdf5
    with tables.File(skeletons_file, 'r+') as fid:
        g_stage_movement = fid.get_node('/stage_movement')
        
        g_stage_movement.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d)
        g_stage_movement.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d)
        g_stage_movement.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d)
        
        g_stage_movement._v_atttrs['fps'] = fps
        g_stage_movement._v_atttrs['delay_frames'] = delay_frames
        g_stage_movement._v_atttrs['microns_per_pixel_scale'] = pixelPerMicronScale
        g_stage_movement._v_atttrs['rotation_matrix'] = rotation_matrix
        g_stage_movement._v_attrs['has_finished'] = 1
    
    
    print_flush('Finished.')
Beispiel #27
0
def smooth_skeletons_table(skeletons_file,
                           features_file,
                           is_WT2=False,
                           skel_smooth_window=5,
                           coords_smooth_window_s=0.25,
                           gap_to_interp_s=0.25):

    #%%

    #%%
    fps = read_fps(skeletons_file)
    coords_smooth_window = int(np.round(fps * coords_smooth_window_s))
    gap_to_interp = int(np.round(fps * gap_to_interp_s))

    if coords_smooth_window <= 3:  #do not interpolate
        coords_smooth_window = None

    trajectories_data = _r_fill_trajectories_data(skeletons_file)
    #%%
    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(skeletons_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    #%%

    #initialize arrays
    food_cnt = read_food_contour(skeletons_file)
    with tables.File(skeletons_file, 'r') as fid:
        n_segments = fid.get_node('/skeleton').shape[1]

    with tables.File(features_file, 'w') as fid_features:
        if food_cnt is not None:
            fid_features.create_array('/',
                                      'food_cnt_coord',
                                      obj=food_cnt.astype(np.float32))

        worm_coords_array = {}
        w_node = fid_features.create_group('/', 'coordinates')
        for array_name in [
                'skeletons', 'dorsal_contours', 'ventral_contours', 'widths'
        ]:
            if array_name != 'widths':
                a_shape = (0, n_segments, 2)
            else:
                a_shape = (0, n_segments)

            worm_coords_array[array_name] = fid_features.create_earray(
                w_node,
                array_name,
                shape=a_shape,
                atom=tables.Float32Atom(shape=()),
                filters=TABLE_FILTERS)

        tot_skeletons = 0
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            if worm_data['was_skeletonized'].sum() < 2:
                continue

            worm = WormFromTable(skeletons_file,
                                 worm_index,
                                 worm_index_type='worm_index_joined')

            if is_WT2:
                worm.correct_schafer_worm()
            if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2:
                warnings.warn('Not enough data to smooth. Empty file?')
                wormN = worm

            else:
                wormN = SmoothedWorm(worm.skeleton,
                                     worm.widths,
                                     worm.ventral_contour,
                                     worm.dorsal_contour,
                                     skel_smooth_window=skel_smooth_window,
                                     coords_smooth_window=coords_smooth_window,
                                     gap_to_interp=gap_to_interp)
            dat_index = pd.Series(False,
                                  index=worm_data['timestamp_raw'].values)

            try:
                dat_index[worm.timestamp] = True
            except ValueError:
                import pdb
                pdb.set_trace()

            #%%
            skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons
            tot_skeletons = skeleton_id[-1] + 1
            row_ind = worm_data.index[dat_index.values]
            trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id
            #%%
            #add data
            worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton'))
            worm_coords_array['dorsal_contours'].append(
                getattr(wormN, 'dorsal_contour'))
            worm_coords_array['ventral_contours'].append(
                getattr(wormN, 'ventral_contour'))
            worm_coords_array['widths'].append(getattr(wormN, 'widths'))

            #display progress
            _display_progress(ind_n + 1)

        #save trajectories data
        newT = fid_features.create_table(
            '/',
            'trajectories_data',
            obj=trajectories_data.to_records(index=False),
            filters=TABLE_FILTERS)
        copy_unit_conversions(newT, skeletons_file)
        newT._v_attrs['is_WT2'] = is_WT2
        newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file)

        #save blob features interpolating in dropped frames and stage movement (WT2)
        blob_features = _r_fill_blob_features(skeletons_file,
                                              trajectories_data, is_WT2)
        if blob_features is not None:
            fid_features.create_table(
                '/',
                'blob_features',
                obj=blob_features.to_records(index=False),
                filters=TABLE_FILTERS)
Beispiel #28
0
def _getData(features_file, READ_FEATURES=False, IS_FOR_WCON=True):
    if IS_FOR_WCON:
        lab_prefix = '@OMG '
    else:
        lab_prefix = ''

    with pd.HDFStore(features_file, 'r') as fid:
        if not '/features_timeseries' in fid:
            return {}  #empty file nothing to do here

        features_timeseries = fid['/features_timeseries']
        feat_time_group_by_worm = features_timeseries.groupby('worm_index')

    ventral_side = _get_ventral_side(features_file)

    with tables.File(features_file, 'r') as fid:

        #fps used to adjust timestamp to real time
        fps = read_fps(features_file)

        #get pointers to some useful data
        skeletons = fid.get_node('/coordinates/skeletons')
        dorsal_contours = fid.get_node('/coordinates/dorsal_contours')
        ventral_contours = fid.get_node('/coordinates/ventral_contours')

        #let's append the data of each individual worm as a element in a list
        all_worms_feats = []

        #group by iterator will return sorted worm indexes
        for worm_id, worm_feat_time in feat_time_group_by_worm:

            worm_id = int(worm_id)
            #read worm skeletons data
            worm_skel = skeletons[worm_feat_time.index]
            worm_dor_cnt = dorsal_contours[worm_feat_time.index]
            worm_ven_cnt = ventral_contours[worm_feat_time.index]

            #start ordered dictionary with the basic features
            worm_basic = OrderedDict()
            worm_basic['id'] = str(worm_id)
            worm_basic['head'] = 'L'
            worm_basic['ventral'] = ventral_side
            worm_basic[
                'ptail'] = worm_ven_cnt.shape[1] - 1  #index starting with 0

            worm_basic['t'] = worm_feat_time[
                'timestamp'].values / fps  #convert from frames to seconds
            worm_basic['x'] = worm_skel[:, :, 0]
            worm_basic['y'] = worm_skel[:, :, 1]

            contour = np.hstack((worm_ven_cnt, worm_dor_cnt[:, ::-1, :]))
            worm_basic['px'] = contour[:, :, 0]
            worm_basic['py'] = contour[:, :, 1]

            if READ_FEATURES:
                worm_features = __addOMGFeat(fid, worm_feat_time, worm_id)
                for feat in worm_features:
                    worm_basic[lab_prefix + feat] = worm_features[feat]

            if IS_FOR_WCON:
                for x in worm_basic:
                    if not x in ['id', 'head', 'ventral', 'ptail']:
                        worm_basic[x] = __reformatForJson(worm_basic[x])

            #append features
            all_worms_feats.append(worm_basic)

    return all_worms_feats
def save_timeseries_feats_table(features_file, derivate_delta_time):
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32),
                       ('timestamp', np.int32)] + feat_dtypes
        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):

                    node_str = '/coordinates/' + p
                    if node_str in fid:
                        node = fid.get_node(node_str)
                        dat = np.full((traj_size, *node.shape[1:]), np.nan)
                        if skel_id_val.size > 0:
                            if len(node.shape) == 3:
                                dd = node[skel_id_val, :, :]
                            else:
                                dd = node[skel_id_val, :]
                            dat[good_id] = dd
                    else:
                        dat = None

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
Beispiel #30
0
    assert not np.any(exp_feats_df.index.duplicated(
    ))  #If there are duplicated indexes there might be an error here

    return exp_feats_df


#%%
if __name__ == '__main__':
    from tierpsy.helper.params import read_fps
    #fname = '/Users/ajaver/OneDrive - Imperial College London/aggregation/N2_1_Ch1_29062017_182108_comp3_featuresN.hdf5'
    #%%

    fname = '/Volumes/behavgenom_archive$/Avelino/screening/CeNDR/Results/CeNDR_Set1_020617/WN2002_worms10_food1-10_Set1_Pos4_Ch4_02062017_115723_featuresN.hdf5'
    with pd.HDFStore(fname, 'r') as fid:
        timeseries_data = fid['/timeseries_data']
        blob_features = fid['/blob_features']
    fps = read_fps(fname)

    key_in = None  #['motion']
    key_ex = None  #['fraction']
    feat_set = None

    feat_stats = get_summary_stats(timeseries_data,
                                   fps,
                                   blob_features,
                                   1 / 3,
                                   only_abs_ventral=True,
                                   feat_selection=(key_in, key_ex, feat_set))

    print(feat_stats)
Beispiel #31
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 24 12:40:17 2017

@author: ajaver
"""

import glob
from tierpsy.helper.params import read_fps

dname = '/Volumes/behavgenom_archive$/Ida/**/MaskedVideos/**/*.hdf5'
fnames = glob.glob(dname, recursive=True)

for f in fnames:
    try:
        fps = read_fps(f)
        print(fps)
    except:
        print('bad')