def getFilteredSkels(skeletons_file,
                     min_num_skel=100,
                     bad_seg_thresh=0.8,
                     min_displacement=5,
                     critical_alpha=0.01,
                     max_width_ratio=2.25,
                     max_area_ratio=6):

    min_num_skel = min_num_skel_defaults(skeletons_file,
                                         min_num_skel=min_num_skel)

    # check if the skeletonization finished succesfully
    with tables.File(skeletons_file, "r") as ske_file_id:
        skeleton_table = ske_file_id.get_node('/skeleton')

    #eliminate skeletons that do not match a decent head, tail and body ratio. Likely to be coils. Taken from Segworm.
    filterPossibleCoils(skeletons_file,
                        max_width_ratio=max_width_ratio,
                        max_area_ratio=max_area_ratio)

    with pd.HDFStore(skeletons_file, 'r') as table_fid:
        trajectories_data = table_fid['/trajectories_data']

    # get valid rows using the trajectory displacement and the
    # skeletonization success. These indexes will be used to calculate statistics of what represent a valid skeleton.
    good_traj_index, good_skel_row = getValidIndexes(
        trajectories_data,
        min_num_skel=min_num_skel,
        bad_seg_thresh=bad_seg_thresh,
        min_displacement=min_displacement)

    #filter skeletons depending the population morphology (area, width and length)
    filterByPopulationMorphology(skeletons_file,
                                 good_skel_row,
                                 critical_alpha=critical_alpha)
Esempio n. 2
0
def getWormFeaturesFilt(
        skeletons_file,
        features_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm,
        feat_filt_param,
        split_traj_time):
    
    feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param)


    def _iniFileGroups():
        # initialize groups for the timeseries and event features
        header_timeseries = {
            feat: tables.Float32Col(
                pos=ii) for ii, (feat, _) in enumerate(
                wStats.feat_timeseries_dtype)}
                
        table_timeseries = features_fid.create_table(
            '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS)

        # save some data used in the calculation as attributes
        fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file)
        table_timeseries._v_attrs['worm_index_type'] = worm_index_type

        # node to save features events
        group_events = features_fid.create_group('/', 'features_events')

        # save the skeletons
        with tables.File(skeletons_file, 'r') as ske_file_id:
            skel_shape = ske_file_id.get_node('/skeleton').shape

        

        worm_coords_array = {}
        w_node = features_fid.create_group('/', 'coordinates')
        for  array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']:
            worm_coords_array[array_name] = features_fid.create_earray(
                w_node,
                array_name,
                shape=(
                    0,
                    skel_shape[1],
                    skel_shape[2]),
                atom=tables.Float32Atom(
                    shape=()),
                filters=TABLE_FILTERS)
        
        # initialize rec array with the averaged features of each worm
        stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV}
    
        return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df
    
    progress_timer = TimeCounter('')
    def _displayProgress(n):
            # display progress
        dd = " Extracting features. Worm %i of %i done." % (n, tot_worms)
        print_flush(
            base_name +
            dd +
            ' Total time:' +
            progress_timer.get_time_str())

    #get the valid number of worms
    good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm, 
        feat_filt_param)
    
    fps = read_fps(skeletons_file)
    split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer
    
    # function to calculate the progress time. Useful to display progress
    base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0]
    
    with tables.File(features_file, 'w') as features_fid:
        #check if the stage was not aligned correctly. Return empty features file otherwise.
        with tables.File(skeletons_file, 'r') as skel_fid:
            if '/experiment_info' in skel_fid:
                dd = skel_fid.get_node('/experiment_info').read()
                features_fid.create_array(
                    '/', 'experiment_info', obj=dd)

        #total number of worms
        tot_worms = len(good_traj_index)
        if tot_worms == 0:
            print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.')
            return

        # initialize by getting the specs data subdivision
        wStats = WormStats()
        all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV}
    

        #initialize file
        header_timeseries, table_timeseries, group_events, \
        worm_coords_array, stats_features_df = _iniFileGroups()



        _displayProgress(0)
        # start to calculate features for each worm trajectory
        for ind_N, worm_index in enumerate(good_traj_index):
            # initialize worm object, and extract data from skeletons file
            worm = WormFromTable(
            skeletons_file,
            worm_index,
            use_skel_filter=use_skel_filter,
            worm_index_type=worm_index_type,
            smooth_window=5)
            
            if is_single_worm:
                #worm with the stage correction applied
                worm.correct_schafer_worm()
                if np.all(np.isnan(worm.skeleton[:, 0, 0])):
                    print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index))
                    return
            # calculate features
            timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats)
            
            #get splitted features
            splitted_worms = [x for x in worm.split(split_traj_frames) 
            if x.n_valid_skel > feat_filt_param['min_num_skel'] and 
            x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']]
            
            dd = [getFeatStats(x, wStats)[1] for x in splitted_worms]
            splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV}

            #% add data to save
            # save timeseries data
            table_timeseries.append(timeseries_data)
            table_timeseries.flush()


            # save skeletons
            worm_coords_array['skeletons'].append(worm.skeleton)
            worm_coords_array['dorsal_contours'].append(worm.dorsal_contour)
            worm_coords_array['ventral_contours'].append(worm.ventral_contour)
            
            # save event data as a subgroup per worm
            worm_node = features_fid.create_group(
                group_events, 'worm_%i' % worm_index)
            worm_node._v_attrs['worm_index'] = worm_index
            worm_node._v_attrs['frame_range'] = np.array(
                (worm.first_frame, worm.last_frame))

            for feat in events_data:
                tmp_data = events_data[feat]
                # consider the cases where the output is a single number, empty
                # or None
                if isinstance(tmp_data, (float, int)):
                    tmp_data = np.array([tmp_data])
                if tmp_data is None or tmp_data.size == 0:
                    tmp_data = np.array([np.nan])
                features_fid.create_carray(
                    worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS)

            # store the average for each worm feature
            for stat in FUNC_FOR_DIV:
                stats_features_df[stat][ind_N] = worm_stats[stat]
                
                #append the splitted traj features
                all_splitted_feats[stat] += splitted_feats[stat]
            # report progress
            _displayProgress(ind_N + 1)
        # create and save a table containing the averaged worm feature for each
        # worm
       
        f_node = features_fid.create_group('/', 'features_summary')
        for stat, stats_df in stats_features_df.items():
            splitted_feats = all_splitted_feats[stat]

            #check that the array is not empty
            if len(splitted_feats) > 0:
                splitted_feats_arr = np.array(splitted_feats)
            else:
                #return a row full of nan to indicate a fail
                splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype)

            features_fid.create_table(
                f_node, 
                stat, 
                obj = stats_df, 
                filters = TABLE_FILTERS
                )
            
            feat_stat_split = features_fid.create_table(
                f_node, 
                stat + '_split', 
                obj=splitted_feats_arr, 
                filters=TABLE_FILTERS
                )
            feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames
        
            

            if stat == 'means':
                #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on.
                features_fid.create_table(
                    '/', 
                    'features_means', 
                    obj = stats_df, 
                    filters = TABLE_FILTERS
                    )
                
                features_fid.create_table(
                    '/', 
                    'features_means_split', 
                    obj=splitted_feats_arr, 
                    filters=TABLE_FILTERS
                    )
        
        
    print_flush(
        base_name +
        ' Feature extraction finished: ' +
        progress_timer.get_time_str())
def getIntensityProfile(masked_image_file,
                        skeletons_file,
                        intensities_file,
                        width_resampling=15,
                        length_resampling=131,
                        min_num_skel=100,
                        smooth_win=11,
                        pol_degree=3,
                        width_percentage=0.5,
                        save_maps=False):

    min_num_skel = min_num_skel_defaults(skeletons_file,
                                         min_num_skel=min_num_skel)

    assert smooth_win > pol_degree
    assert min_num_skel > 0
    assert 0 < width_percentage < 1

    # we want to use symetrical distance centered in the skeleton
    if length_resampling % 2 == 0:
        length_resampling += 1
    if width_resampling % 2 == 0:
        width_resampling += 1

    # get the limits to be averaged from the intensity map
    if save_maps:
        width_win_ind = getWidthWinLimits(width_resampling, width_percentage)
    else:
        width_win_ind = (0, width_resampling)

    # filters for the tables structures
    table_filters = tables.Filters(complevel=5,
                                   complib='zlib',
                                   shuffle=True,
                                   fletcher32=True)

    # Get a reduced version of the trajectories_data table with only the valid skeletons.
    # The rows of this new table are going to be saved into skeletons_file
    trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel)

    # let's save this new table into the intensities file
    with tables.File(intensities_file, 'w') as fid:
        fid.create_table('/',
                         'trajectories_data_valid',
                         obj=trajectories_data_valid.to_records(index=False),
                         filters=table_filters)

    tot_rows = len(trajectories_data_valid)
    if tot_rows == 0:
        with tables.File(intensities_file, "r+") as int_file_id:
            # nothing to do here let's save empty data and go out
            worm_int_avg_tab = int_file_id.create_array(
                "/", "straighten_worm_intensity_median", obj=np.zeros(0))
            worm_int_avg_tab._v_attrs['has_finished'] = 1
        return

    with tables.File(masked_image_file, 'r')  as mask_fid, \
            tables.File(skeletons_file, 'r') as ske_file_id, \
            tables.File(intensities_file, "r+") as int_file_id:

        # pointer to the compressed videos
        mask_dataset = mask_fid.get_node("/mask")

        # pointer to skeletons
        skel_tab = ske_file_id.get_node('/skeleton')
        skel_width_tab = ske_file_id.get_node('/width_midbody')

        filters = tables.Filters(complevel=5, complib='zlib', shuffle=True)

        # we are using Float16 to save space, I am assuing the intensities are
        # between uint8
        worm_int_avg_tab = int_file_id.create_carray(
            "/",
            "straighten_worm_intensity_median",
            tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling),
            chunkshape=(1, length_resampling),
            filters=table_filters)

        worm_int_avg_tab._v_attrs['has_finished'] = 0
        worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind

        if save_maps:
            worm_int_tab = int_file_id.create_carray(
                "/",
                "straighten_worm_intensity",
                tables.Float16Atom(dflt=np.nan),
                (tot_rows, length_resampling, width_resampling),
                chunkshape=(1, length_resampling, width_resampling),
                filters=table_filters)

        grouped_frames = trajectories_data_valid.groupby('frame_number')
        # variables used to report progress
        base_name = skeletons_file.rpartition('.')[0].rpartition(
            os.sep)[-1].rpartition('_')[0]
        progressTime = TimeCounter('Obtaining intensity maps.',
                                   len(grouped_frames))

        for frame, frame_data in grouped_frames:
            img = mask_dataset[frame, :, :]
            for ii, row_data in frame_data.iterrows():
                skeleton_id = int(row_data['skeleton_id'])
                worm_index = int(row_data['worm_index_joined'])
                int_map_id = int(row_data['int_map_id'])

                # read ROI and skeleton, and put them in the same coordinates
                # map
                worm_img, roi_corner = getWormROI(img, row_data['coord_x'],
                                                  row_data['coord_y'],
                                                  row_data['roi_size'])
                skeleton = skel_tab[skeleton_id, :, :] - roi_corner

                half_width = skel_width_tab[skeleton_id] / 2
                assert not np.isnan(skeleton[0, 0])

                skel_smooth = smoothSkeletons(
                    skeleton,
                    length_resampling=length_resampling,
                    smooth_win=smooth_win,
                    pol_degree=pol_degree)
                straighten_worm, grid_x, grid_y = getStraightenWormInt(
                    worm_img,
                    skel_smooth,
                    half_width=half_width,
                    width_resampling=width_resampling)

                # if you use the mean it is better to do not use float16
                int_avg = np.median(
                    straighten_worm[width_win_ind[0]:width_win_ind[1], :],
                    axis=0)

                worm_int_avg_tab[int_map_id] = int_avg

                # only save the full map if it is specified by the user
                if save_maps:
                    worm_int_tab[int_map_id] = straighten_worm.T

            if frame % 500 == 0:
                progress_str = progressTime.get_str(frame)
                print_flush(base_name + ' ' + progress_str)

        worm_int_avg_tab._v_attrs['has_finished'] = 1