def getFoodContour(mask_file, skeletons_file, use_nn_food_cnt, model_path, solidity_th=0.98, _is_debug = False ): base_name = get_base_name(mask_file) progress_timer = TimeCounter('') print_flush("{} Calculating food contour {}".format(base_name, progress_timer.get_time_str())) food_cnt = calculate_food_cnt(mask_file, use_nn_food_cnt = use_nn_food_cnt, model_path = model_path, solidity_th= solidity_th, _is_debug = _is_debug) #store contour coordinates into the skeletons file and mask_file the contour file for fname in [skeletons_file, mask_file]: with tables.File(fname, 'r+') as fid: if '/food_cnt_coord' in fid: fid.remove_node('/food_cnt_coord') #if it is a valid contour save it if food_cnt is not None and \ food_cnt.size >= 2 and \ food_cnt.ndim == 2 and \ food_cnt.shape[1] == 2: tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt) tab._v_attrs['use_nn_food_cnt'] = int(use_nn_food_cnt)
def generateMoviesROI(masked_file, trajectories_data, roi_size=-1, progress_prefix='', bgnd_param={}, progress_refresh_rate_s=20): if len(trajectories_data) == 0: print_flush(progress_prefix + ' No valid data. Exiting.') else: frames = trajectories_data['frame_number'].unique() img_generator = generateImages(masked_file, frames=frames, bgnd_param=bgnd_param) traj_group_by_frame = trajectories_data.groupby('frame_number') progress_time = TimeCounter(progress_prefix, max(frames)) fps = read_fps(masked_file, dflt=25) progress_refresh_rate = int(round(fps * progress_refresh_rate_s)) for ii, (current_frame, img) in enumerate(img_generator): frame_data = traj_group_by_frame.get_group(current_frame) #dictionary where keys are the table row and the values the worms ROIs yield getAllImgROI(img, frame_data, roi_size) if current_frame % progress_refresh_rate == 0: print_flush(progress_time.get_str(current_frame)) print_flush(progress_time.get_str(current_frame))
def filterFiles(self, valid_files, print_cmd=False): # for ii, video_file in enumerate(valid_files): # label, ap_obj, unfinished_points = self._checkIndFile(video_file) # self.filtered_files[label].append((ap_obj, unfinished_points)) # if (ii % 10) == 0: progress_timer = TimeCounter('') n_batch = mp.cpu_count() if self.is_parallel_check: lock = mp.Lock() p = mp.Pool(n_batch, initializer=init_analysis_point_lock, initargs=(lock, )) all_points = [] tot_files = len(valid_files) for ii in range(0, tot_files, n_batch): dat = valid_files[ii:ii + n_batch] if self.is_parallel_check: res = list(p.map(self._checkIndFile, dat)) else: res = list(map(self._checkIndFile, dat)) all_points.append(res) n_files = len(dat) print('Checking file {} of {}. Total time: {}'.format( ii + n_files, tot_files, progress_timer.get_time_str())) all_points = sum(all_points, []) #flatten # intialize filtered files lists filtered_files_fields = ('SOURCE_GOOD', 'SOURCE_BAD', 'FINISHED_GOOD', 'FINISHED_BAD', 'EMPTY_ANALYSIS_LIST') self.filtered_files = {key: [] for key in filtered_files_fields} for label, ap_obj, unfinished_points in all_points: self.filtered_files[label].append((ap_obj, unfinished_points)) print(BREAK_L) print('''Finished to check files.\nTotal time elapsed {}'''.format( progress_timer.get_time_str())) print(BREAK_L + '\n') cmd_list = self.getCMDlist() if print_cmd: #print the commands to be executed print(BREAK_L) print('Commands to be executed.') print(BREAK_L) print_cmd_list(cmd_list) print(BREAK_L + '\n') print(self.summary_msg) return cmd_list
def createSampleVideo(masked_image_file, sample_video_name='', time_factor=8, size_factor=5, skip_factor=2, dflt_fps=30, codec='MPEG', shift_bgnd=False): #skip factor is to reduce the size of the movie by using less frames (so we use 15fps for example instead of 30fps) #%% if not sample_video_name: sample_video_name = getSubSampleVidName(masked_image_file) # initialize timers base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progressTime = TimeCounter( '{} Generating subsampled video.'.format(base_name)) with tables.File(masked_image_file, 'r') as fid: masks = fid.get_node('/mask') tot_frames, im_h, im_w = masks.shape im_h, im_w = im_h // size_factor, im_w // size_factor fps = read_fps(masked_image_file, dflt_fps) tt_vec = _getCorrectedTimeVec(fid, tot_frames) #%% #codec values that work 'H264' #'MPEG' #XVID vid_writer = cv2.VideoWriter(sample_video_name, \ cv2.VideoWriter_fourcc(*codec), fps/skip_factor, (im_w,im_h), isColor=False) assert vid_writer.isOpened() if shift_bgnd: #lazy bgnd calculation, just take the last and first frame and get the top 95 pixel value mm = masks[[0, -1], :, :] _bgnd_val = np.percentile(mm[mm != 0], [97.5])[0] for frame_number in range(0, tot_frames, int(time_factor * skip_factor)): current_frame = int(tt_vec[frame_number]) img = masks[current_frame] if shift_bgnd: img[img == 0] = _bgnd_val im_new = cv2.resize(img, (im_w, im_h)) vid_writer.write(im_new) if frame_number % (500 * time_factor) == 0: # calculate the progress and put it in a string print_flush(progressTime.get_str(frame_number)) vid_writer.release() print_flush(progressTime.get_str(frame_number) + ' DONE.')
def get_food_contour(mask_video, min_area=None, n_bins=180, frac_lowess=0.1, is_debug=False): ''' Identify the contour of a food patch. I tested this for the worm rig. It assumes the food has a semi-circular shape. The food lawn is very thin so the challenge was to estimate the contour of a very dim area. ''' #%% progress_timer = TimeCounter('') base_name = get_base_name(mask_video) print_flush('{} Calculating food contour...'.format(base_name)) try: with tables.File(mask_video, 'r') as fid: full_data = fid.get_node( '/full_data' )[:5] # I am using the first two images to calculate this info except tables.exceptions.NoSuchNodeError: return None, None img = np.max(full_data[:2], axis=0) #dark_mask = get_dark_mask(full_data) mask = get_patch_mask(img, min_area=min_area) circx, circy, best_fit = mask_to_food_contour(mask, n_bins=n_bins, frac_lowess=frac_lowess) #%% dd = '{} Food contour calculated. Total time: {}'.format( base_name, progress_timer.get_time_str()) print_flush(dd) #%% if is_debug: from skimage.draw import circle_perimeter import matplotlib.pylab as plt cpx, cpy = circle_perimeter(*best_fit[1:]) plt.figure(figsize=(5, 5)) plt.gca().xaxis.set_ticklabels([]) plt.gca().yaxis.set_ticklabels([]) (px, py) = np.where(skeletonize(mask)) plt.imshow(img, cmap='gray') plt.plot(py, px, '.') plt.plot(cpx, cpy, '.r') plt.plot(circy, circx, '.') plt.suptitle(base_name) plt.grid('off') #%% return circx, circy
def reformatRigMaskedVideo(original_file, new_file, plugin_param_file, expected_fps, microns_per_pixel): plugin_params = _getWormEnconderParams(plugin_param_file) base_name = original_file.rpartition('.')[0].rpartition(os.sep)[-1] if not _isValidSource(original_file): print_flush(new_file + ' ERROR. File might be corrupt. ' + original_file) return save_full_interval, buffer_size, mask_params = _getReformatParams( plugin_params) with tables.File(original_file, 'r') as fid_old, \ tables.File(new_file, 'w') as fid_new: mask_old = fid_old.get_node('/mask') tot_frames, im_height, im_width = mask_old.shape progress_timer = TimeCounter('Reformating Gecko plugin hdf5 video.', tot_frames) attr_params = dict(expected_fps=expected_fps, microns_per_pixel=microns_per_pixel, is_light_background=True) mask_new, full_new, _ = initMasksGroups(fid_new, tot_frames, im_height, im_width, attr_params, save_full_interval, is_expandable=False) mask_new.attrs['plugin_params'] = json.dumps(plugin_params) img_buff_ini = mask_old[:buffer_size] full_new[0] = img_buff_ini[0] mask_new[:buffer_size] = img_buff_ini * (mask_old[buffer_size] != 0) for frame in range(buffer_size, tot_frames): if frame % save_full_interval != 0: mask_new[frame] = mask_old[frame] else: full_frame_n = frame // save_full_interval img = mask_old[frame] full_new[full_frame_n] = img mask_new[frame] = img * (mask_old[frame - 1] != 0) if frame % 500 == 0: # calculate the progress and put it in a string progress_str = progress_timer.get_str(frame) print_flush(base_name + ' ' + progress_str) print_flush(base_name + ' Compressed video done. Total time:' + progress_timer.get_time_str())
def generateROIBuff(masked_image_file, buffer_size, bgnd_param, progress_str='', progress_refresh_rate_s=20): img_generator = generateImages(masked_image_file, bgnd_param=bgnd_param) with tables.File(masked_image_file, 'r') as mask_fid: tot_frames, im_h, im_w = mask_fid.get_node("/mask").shape #loop, save data and display progress base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progress_str = base_name + progress_str fps = read_fps(masked_image_file, dflt=25) progress_refresh_rate = fps * progress_refresh_rate_s progress_time = TimeCounter(progress_str, tot_frames) for frame_number, image in img_generator: if frame_number % buffer_size == 0: if frame_number + buffer_size > tot_frames: buffer_size = tot_frames - frame_number #change this value, otherwise the buffer will not get full image_buffer = np.zeros((buffer_size, im_h, im_w), np.uint8) ini_frame = frame_number image_buffer[frame_number - ini_frame] = image #compress if it is the last frame in the buffer if (frame_number + 1) % buffer_size == 0 or (frame_number + 1 == tot_frames): # z projection and select pixels as connected regions that were selected as worms at # least once in the masks main_mask = np.any(image_buffer, axis=0) # change from bool to uint since same datatype is required in # opencv main_mask = main_mask.astype(np.uint8) #calculate the contours, only keep the external contours (no holes) and _, ROI_cnts, _ = cv2.findContours(main_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) yield ROI_cnts, image_buffer, ini_frame if frame_number % progress_refresh_rate == 0: print_flush(progress_time.get_str(frame_number)) print_flush(progress_time.get_str(frame_number))
def generateImages(masked_image_file, frames=[], bgnd_param={}, progress_str='', progress_refresh_rate_s=20): #loop, save data and display progress base_name = Path(masked_image_file).stem progress_str = base_name + progress_str fps = read_fps(masked_image_file, dflt=25) progress_refresh_rate = fps * progress_refresh_rate_s with tables.File(masked_image_file, 'r') as mask_fid: mask_dataset = mask_fid.get_node("/mask") tot_frames = mask_dataset.shape[0] progress_time = TimeCounter(progress_str, tot_frames) if len(bgnd_param) > 0: if '/bgnd' in mask_fid: bgnd_subtractor = BackgroundSubtractorPrecalculated( masked_image_file, **bgnd_param) else: bgnd_subtractor = BackgroundSubtractorMasked( masked_image_file, **bgnd_param) else: bgnd_subtractor = None if len(frames) == 0: frames = range(mask_dataset.shape[0]) for frame_number in frames: if frame_number % progress_refresh_rate == 0: print_flush(progress_time.get_str(frame_number)) image = mask_dataset[frame_number] if bgnd_subtractor is not None: image = bgnd_subtractor.apply(image, frame_number) yield frame_number, image print_flush(progress_time.get_str(frame_number))
def exec_parallel(input_data, func): print('*******', len(input_data)) progress_timer = TimeCounter() n_batch = mp.cpu_count() p = mp.Pool(n_batch) tot = len(input_data) #all_files = all_files[slice(0, len(all_files), 10)] #FOR TESTING output_data = [] for ii in range(0, tot, n_batch): dat = input_data[ii:ii + n_batch] for x in p.map(func, dat): output_data.append(x) print('{} of {}. Total time: {}'.format(min(ii + n_batch, tot), tot, progress_timer.get_time_str())) return output_data
def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
def RunMultiCMD(cmd_list, local_obj='', max_num_process=3, refresh_time=10, is_debug = True): '''Start different process using the command is cmd_list''' start_obj = partial(StartProcess, local_obj=local_obj, is_debug=is_debug) total_timer = TimeCounter() #timer to meassure the total time cmd_list = cmd_list[::-1] # since I am using pop to get the next element i need to invert the list to get athe same order tot_tasks = len(cmd_list) if tot_tasks < max_num_process: max_num_process = tot_tasks # initialize the first max_number_process in the list finished_tasks = [] current_tasks = [] for ii in range(max_num_process): cmd = cmd_list.pop() current_tasks.append(start_obj(cmd)) # keep loop tasks as long as there are tasks in the list while current_tasks: time.sleep(refresh_time) print(GUI_CLEAR_SIGNAL) os.system(['clear', 'cls'][os.name == 'nt']) # print info of the finished tasks for task_finish_msg in finished_tasks: sys.stdout.write(task_finish_msg) # loop along the process list to update output and see if there is any # task finished next_tasks = [] #I want to close the tasks after starting the next the tasks. It has de disadvantage of #requiring more disk space, (required files for the new task + the finished files) #but at least it should start a new tasks while it is copying the old results. tasks_to_close = [] for task in current_tasks: task.read_buff() if task.proc.poll() is None: # add task to the new list if it hasn't complete next_tasks.append(task) sys.stdout.write(task.output[-1]) else: # close the task and add its las output to the finished_tasks # list tasks_to_close.append(task) # add new task once the previous one was finished if cmd_list and len(next_tasks) < max_num_process: cmd = cmd_list.pop() next_tasks.append(start_obj(cmd)) # if there is stlll space add a new tasks. while cmd_list and len(next_tasks) < max_num_process: cmd = cmd_list.pop() next_tasks.append(start_obj(cmd)) #close tasks (copy finished files to final destination) for task in tasks_to_close: task.close() sys.stdout.write(task.output[-1]) finished_tasks.append(task.output[-1]) #start the new loop current_tasks = next_tasks #display progress n_finished = len(finished_tasks) n_remaining = len(current_tasks) + len(cmd_list) progress_str = 'Tasks: {} finished, {} remaining. Total_time {}.'.format( n_finished, n_remaining, total_timer.get_time_str()) print('*************************************************') print(progress_str) print('*************************************************') #if i don't add this the GUI could terminate before displaying the last text. sys.stdout.flush() time.sleep(1)
def correctHeadTailIntensity(skeletons_file, intensities_file, smooth_W=5, gap_size=-1, min_block_size=-1, local_avg_win=-1, min_frac_in=0.85, head_tail_param={}, head_tail_int_method='MEDIAN_INT'): output = head_tail_int_defaults(skeletons_file, smooth_W=smooth_W, gap_size=gap_size, min_block_size=min_block_size, local_avg_win=local_avg_win) smooth_W = output['smooth_W'] gap_size = output['gap_size'] min_block_size = output['min_block_size'] local_avg_win = output['local_avg_win'] head_tail_param = head_tail_defaults(skeletons_file, **head_tail_param) # get the trajectories table with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] # at this point the int_map_id with the intensity maps indexes must # exist in the table assert 'int_map_id' in trajectories_data grouped_trajectories = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_trajectories) # variables to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] progress_timer = TimeCounter('') bad_worms = [ ] # worms with not enough difference between the normal and inverted median intensity profile switched_blocks = [] # data from the blocks that were switched #ind2check = [765] for index_n, (worm_index, trajectories_worm) in enumerate(grouped_trajectories): # if not worm_index in ind2check: continue if index_n % 10 == 0: dd = " Correcting Head-Tail using intensity profiles. Worm %i of %i." % ( index_n + 1, tot_worms) dd = base_name + dd + ' Total time:' + progress_timer.get_time_str( ) print_flush(dd) # correct head tail using the intensity profiles dd = correctHeadTailIntWorm(trajectories_worm, skeletons_file, intensities_file, smooth_W, gap_size, min_block_size, local_avg_win, min_frac_in, head_tail_int_method) switched_blocks += [(worm_index, t0, tf) for t0, tf in dd] # check that the final orientation is correct, otherwise switch the # whole trajectory if head_tail_int_method != 'HEAD_BRIGHTER': p_tot, skel_group, int_group = checkFinalOrientation( skeletons_file, intensities_file, trajectories_worm, min_block_size, head_tail_param) if p_tot < 0.5: switchBlocks(skel_group, skeletons_file, int_group, intensities_file) # label the process as finished and store the indexes of the switched worms with tables.File(skeletons_file, 'r+') as fid: if not '/intensity_analysis' in fid: fid.create_group('/', 'intensity_analysis') if '/intensity_analysis/bad_worms' in fid: fid.remove_node('/intensity_analysis/min_block_size/bad_worms') if '/intensity_analysis/switched_head_tail' in fid: fid.remove_node('/intensity_analysis/switched_head_tail') if bad_worms: fid.create_array('/intensity_analysis', 'bad_worms', np.array(bad_worms)) if switched_blocks: # to rec array switched_blocks = np.array(switched_blocks, dtype=[('worm_index', np.int), ('ini_frame', np.int), ('last_frame', np.int)]) fid.create_table('/intensity_analysis', 'switched_head_tail', switched_blocks) fid.get_node('/skeleton')._v_attrs['has_finished'] = 4 print_flush(base_name + ' Head-Tail correction using intensity profiles finished: ' + progress_timer.get_time_str())
"/data2/shared/data/twoColour/Results/*/*/*52.1g_X1_skeletons.hdf5") for skeletons_file in filenames: base_name = get_base_name(skeletons_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] blob_features = ske_file_id['/blob_features'] #I want to update blob_features blob_features['signed_speed'] = np.float32(np.nan) blob_features['velocity_x'] = np.float32(np.nan) blob_features['velocity_y'] = np.float32(np.nan) progress_timer = TimeCounter('') with tables.File(skeletons_file, 'r') as fid: skeletons = fid.get_node('/skeleton') grouped_by_index = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_by_index) for ii, (worm_index, worm_data) in enumerate(grouped_by_index): feats = blob_features.loc[worm_data.index] skel_coords = skeletons[worm_data.index] xx = feats['coord_x'] yy = feats['coord_y'] signed_speed, velocity = _get_signed_velocity( xx, yy, skel_coords) #import pdb #pdb.set_trace() blob_features.loc[worm_data.index,
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, time_windows, time_units, _is_debug=False, **fold_args): """ Gets input from the GUI, calls the function that chooses the type of summary and runs the summary calculation for each file in the root_dir. """ save_base_name = 'summary_{}_{}'.format(feature_type, summary_type) if is_manual_index: save_base_name += '_manual' save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') #check the options are valid check_in_list(feature_type, valid_feature_types, 'feature_type') check_in_list(summary_type, valid_summary_types, 'summary_type') #convert time windows to list of integers in frame number units time_windows_ints = time_windows_parser(time_windows) #get summary function summary_func = get_summary_func(feature_type, summary_type, is_manual_index, **fold_args) #get extension of results file possible_ext = feature_files_ext[feature_type] ext = possible_ext[1] if is_manual_index else possible_ext[0] fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True) if not fnames: print_flush('No valid files found. Nothing to do here.') return # EM :Make df_files list with one dataframe per time window dd = tuple(zip(*enumerate(sorted(fnames)))) df_files = [ pd.DataFrame({ 'file_id': dd[0], 'file_name': dd[1] }) for x in range(len(time_windows_ints)) ] for iwin in range(len(time_windows_ints)): df_files[iwin]['is_good'] = False progress_timer = TimeCounter('') def _displayProgress(n): args = (n + 1, len(df_files[0]), progress_timer.get_time_str()) dd = "Extracting features summary. File {} of {} done. Total time: {}".format( *args) print_flush(dd) _displayProgress(-1) # EM :Make all_summaries list with one dataframe per time window all_summaries = [[] for x in range(len(time_windows_ints))] for ifile, row in df_files[0].iterrows(): fname = row['file_name'] df_list = summary_func(fname, time_windows_ints, time_units) for iwin, df in enumerate(df_list): try: df.insert(0, 'file_id', ifile) all_summaries[iwin].append(df) except (AttributeError, IOError, KeyError, tables.exceptions.HDF5ExtError, tables.exceptions.NoSuchNodeError): continue else: if not df.empty: df_files[iwin].loc[ifile, 'is_good'] = True _displayProgress(ifile) for iwin in range(len(time_windows_ints)): all_summaries[iwin] = pd.concat(all_summaries[iwin], ignore_index=True, sort=False) f1 = os.path.join( root_dir, 'filenames_{}_window_{}.csv'.format(save_base_name, iwin)) df_files[iwin].to_csv(f1, index=False) f2 = os.path.join( root_dir, 'features_{}_window_{}.csv'.format(save_base_name, iwin)) all_summaries[iwin].to_csv(f2, index=False) out = '****************************' out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2) print_flush(out) return df_files, all_summaries
def _get_timeseries_feats(features_file, delta_time=1 / 3): ''' Get the all the time series features from the skeletons ''' timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r') as fid: if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) timeseries_features = [] for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node = fid.get_node('/coordinates/' + p) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features(*args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) timeseries_features.append(feats) _display_progress(ind_n) timeseries_features = pd.concat(timeseries_features, ignore_index=True) return timeseries_features
def compressVideo(video_file, masked_image_file, mask_param, expected_fps=25, microns_per_pixel=None, bgnd_param={}, buffer_size=-1, save_full_interval=-1, max_frame=1e32, is_extract_timestamp=False, fovsplitter_param={}): ''' Compresses video by selecting pixels that are likely to have worms on it and making the rest of the image zero. By creating a large amount of redundant data, any lossless compression algorithm will dramatically increase its efficiency. The masked images are saved as hdf5 with gzip compression. The mask is calculated over a minimum projection of an image stack. This projection preserves darker regions (or brighter regions, in the case of fluorescent labelling) where the worm has more probability to be located. Additionally it has the advantage of reducing the processing load by only requiring to calculate the mask once per image stack. video_file -- original video file masked_image_file -- buffer_size -- size of the image stack used to calculate the minimal projection and the mask save_full_interval -- have often a full image is saved max_frame -- last frame saved (default a very large number, so it goes until the end of the video) mask_param -- parameters used to calculate the mask ''' #get the default values if there is any bad parameter output = compress_defaults(masked_image_file, expected_fps, buffer_size=buffer_size, save_full_interval=save_full_interval) buffer_size = output['buffer_size'] save_full_interval = output['save_full_interval'] if len(bgnd_param) > 0: is_bgnd_subtraction = True assert bgnd_param['buff_size'] > 0 and bgnd_param['frame_gap'] > 0 else: is_bgnd_subtraction = False if len(fovsplitter_param) > 0: is_fov_tosplit = True assert all(key in fovsplitter_param for key in ['total_n_wells', 'whichsideup', 'well_shape']) assert fovsplitter_param['total_n_wells'] > 0 else: is_fov_tosplit = False # processes identifier. base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] # select the video reader class according to the file type. vid = selectVideoReader(video_file) # delete any previous if it existed with tables.File(masked_image_file, "w") as mask_fid: pass #Extract metadata if is_extract_timestamp: # extract and store video metadata using ffprobe #NOTE: i cannot calculate /timestamp until i am sure of the total number of frames print_flush(base_name + ' Extracting video metadata...') expected_frames = store_meta_data(video_file, masked_image_file) else: expected_frames = 1 # Initialize background subtraction if required if is_bgnd_subtraction: print_flush(base_name + ' Initializing background subtraction.') bgnd_subtractor = BackgroundSubtractorVideo(video_file, **bgnd_param) # intialize some variables max_intensity, min_intensity = np.nan, np.nan frame_number = 0 full_frame_number = 0 image_prev = np.zeros([]) # Initialise FOV splitting if needed if is_bgnd_subtraction: img_fov = bgnd_subtractor.bgnd.astype(np.uint8) else: ret, img_fov = vid.read() # close and reopen the video, to restart from the beginning vid.release() vid = selectVideoReader(video_file) if is_fov_tosplit: # TODO: change class creator so it only needs the video name? by using # Tierpsy's functions such as selectVideoReader it can then read the first image by itself camera_serial = parse_camera_serial(masked_image_file) fovsplitter = FOVMultiWellsSplitter(img_fov, camera_serial=camera_serial, px2um=microns_per_pixel, **fovsplitter_param) wells_mask = fovsplitter.wells_mask else: wells_mask = None # initialize timers print_flush(base_name + ' Starting video compression.') if expected_frames == 1: progressTime = TimeCounter('Compressing video.') else: #if we know the number of frames display it in the progress progressTime = TimeCounter('Compressing video.', expected_frames) with tables.File(masked_image_file, "r+") as mask_fid: #initialize masks groups attr_params = dict(expected_fps=expected_fps, microns_per_pixel=microns_per_pixel, is_light_background=int( mask_param['is_light_background'])) mask_dataset, full_dataset, mean_intensity = initMasksGroups( mask_fid, expected_frames, vid.height, vid.width, attr_params, save_full_interval) if is_bgnd_subtraction: bg_dataset = createImgGroup(mask_fid, "/bgnd", 1, vid.height, vid.width, is_expandable=False) bg_dataset[0, :, :] = img_fov if vid.dtype != np.uint8: # this will worm as flags to be sure that the normalization took place. normalization_range = mask_fid.create_earray( '/', 'normalization_range', atom=tables.Float32Atom(), shape=(0, 2), expectedrows=expected_frames, filters=TABLE_FILTERS) while frame_number < max_frame: ret, image = vid.read() if ret != 0: # increase frame number frame_number += 1 # opencv can give an artificial rgb image. Let's get it back to # gray scale. if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if image.dtype != np.uint8: # normalise image intensities if the data type is other # than uint8 image, img_norm_range = normalizeImage(image) normalization_range.append(img_norm_range) #limit the image range to 1 to 255, 0 is a reserved value for the background assert image.dtype == np.uint8 image = np.clip(image, 1, 255) # Add a full frame every save_full_interval if frame_number % save_full_interval == 1: full_dataset.append(image[np.newaxis, :, :]) full_frame_number += 1 # buffer index ind_buff = (frame_number - 1) % buffer_size # initialize the buffer when the index correspond to 0 if ind_buff == 0: Ibuff = np.zeros((buffer_size, vid.height, vid.width), dtype=np.uint8) # add image to the buffer Ibuff[ind_buff, :, :] = image.copy() mean_int = np.mean(image) assert mean_int >= 0 mean_intensity.append(np.array([mean_int])) else: # sometimes the last image is all zeros, control for this case if np.all(Ibuff[ind_buff] == 0): frame_number -= 1 ind_buff -= 1 # close the buffer Ibuff = Ibuff[:ind_buff + 1] # mask buffer and save data into the hdf5 file if (ind_buff == buffer_size - 1 or ret == 0) and Ibuff.size > 0: if is_bgnd_subtraction: Ibuff_b = bgnd_subtractor.apply(Ibuff, frame_number) else: Ibuff_b = Ibuff #calculate the max/min in the of the buffer img_reduce = reduceBuffer(Ibuff_b, mask_param['is_light_background']) mask = getROIMask(img_reduce, wells_mask=wells_mask, **mask_param) Ibuff *= mask # now apply the well_mask if is MWP if is_fov_tosplit: fovsplitter.apply_wells_mask( Ibuff) # Ibuff will be modified after this # add buffer to the hdf5 file frame_first_buff = frame_number - Ibuff.shape[0] mask_dataset.append(Ibuff) if frame_number % 500 == 0: # calculate the progress and put it in a string progress_str = progressTime.get_str(frame_number) print_flush(base_name + ' ' + progress_str) # finish process if ret == 0: break # now that the whole video is read, we definitely have a better estimate # for its number of frames. so set the save_interval again if is_bgnd_subtraction: # bg_dataset._v_attrs['save_interval'] = len(vid) # the above line is not accurate when using ffmpeg, # it's just safer to do: bg_dataset._v_attrs['save_interval'] = mask_dataset.shape[0] # close the video vid.release() # save fovsplitting data if is_fov_tosplit: fovsplitter.write_fov_wells_to_file(masked_image_file) if fovsplitter.is_dubious: print(f'Check {masked_image_file} for plate alignment') read_and_save_timestamp(masked_image_file) print_flush(base_name + ' Compressed video done.')
def save_timeseries_feats_table(features_file, derivate_delta_time, fovsplitter_param={}): timeseries_features = [] fps = read_fps(features_file) # initialise class for splitting fov if len(fovsplitter_param) > 0: is_fov_tosplit = True assert all(key in fovsplitter_param for key in ['total_n_wells', 'whichsideup', 'well_shape']) assert fovsplitter_param['total_n_wells'] > 0 else: is_fov_tosplit = False print('is fov to split?', is_fov_tosplit) if is_fov_tosplit: # split fov in wells masked_image_file = features_file.replace('Results', 'MaskedVideos') masked_image_file = masked_image_file.replace('_featuresN.hdf5', '.hdf5') # fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file, # total_n_wells=fovsplitter_param['total_n_wells'], # whichsideup=fovsplitter_param['whichsideup'], # well_shape=fovsplitter_param['well_shape']) fovsplitter = FOVMultiWellsSplitter(masked_image_file, **fovsplitter_param) # store wells data in the features file fovsplitter.write_fov_wells_to_file(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32), ('well_name', 'S3')] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index if is_fov_tosplit: feats[ 'well_name'] = fovsplitter.find_well_from_trajectories_data( worm_data) else: feats['well_name'] = 'n/a' # cast well_name to the correct type # (before shuffling columns, so it remains the last entry) # needed because for some reason this does not work: # feats['well_name'] = feats['well_name'].astype('S3') feats['_well_name'] = feats['well_name'].astype('S3') feats.drop(columns='well_name', inplace=True) feats.rename(columns={'_well_name': 'well_name'}, inplace=True) #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-2:] + cols[:-2] cols[1], cols[2] = cols[2], cols[1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
DEBUG = False if __name__ == '__main__': skeletons_file = '/Volumes/behavgenom_archive$/Serena/SpikingDatasetRecordings51-64/Results/recording60/recording60.2g/recording60.2g_X1_skeletons.hdf5' base_name = get_base_name(skeletons_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] blob_features = ske_file_id['/blob_features'] #I want to update blob_features blob_features['signed_speed'] = np.nan progress_timer = TimeCounter('') with tables.File(skeletons_file, 'r') as fid: skeletons = fid.get_node('/skeleton') grouped_by_index = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_by_index) for ii, (worm_index, worm_data) in enumerate(grouped_by_index): feats = blob_features.loc[worm_data.index] skel_coords = skeletons[worm_data.index] xx = feats['coord_x'] yy = feats['coord_y'] signed_speed = _get_signed_velocity(xx, yy, skel_coords) blob_features.loc[worm_data.index[:-1], 'signed_speed'] = signed_speed if ii % 100 == 0: dd = " Calculating signed speed. Worm %i of %i." % (ii + 1, tot_worms)
def save_timeseries_feats_table(features_file, derivate_delta_time): timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32)] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, time_windows, time_units, n_processes=1, _is_debug=False, **fold_args): """ Gets input from the GUI, calls the function that chooses the type of summary and runs the summary calculation for each file in the root_dir. """ save_base_name = 'summary_{}_{}'.format(feature_type, summary_type) if is_manual_index: save_base_name += '_manual' save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') #check the options are valid check_in_list(feature_type, valid_feature_types, 'feature_type') check_in_list(summary_type, valid_summary_types, 'summary_type') # EM : convert time windows to list of integers in frame number units time_windows_ints = time_windows_parser(time_windows) # EM : get list of keywords to include and to exclude # TODO: catch conflicts keywords_in = keywords_parser(keywords_include) keywords_ex = keywords_parser(keywords_exclude) # EM : get full path to feature set file selected_feat = feat_set_parser(select_feat) #get summary function # INPUT time windows time units here summary_func = get_summary_func(feature_type, summary_type, time_windows_ints, time_units, is_manual_index, **fold_args) #get extension of results file possible_ext = feature_files_ext[feature_type] ext = possible_ext[1] if is_manual_index else possible_ext[0] fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True) if len(fnames) == 0: print_flush('No valid files found. Nothing to do here.') return None, None # EM :Make df_files list with one features_summaries dataframe per time window df_files = make_df_filenames(fnames, time_windows_ints, time_units) progress_timer = TimeCounter('') def _displayProgress(n): args = (n + 1, len(df_files[0]), progress_timer.get_time_str()) dd = "Extracting features summary. File {} of {} done. Total time: {}".format( *args) print_flush(dd) _displayProgress(-1) # EM :Make all_summaries list with one element per time window. Each element contains # the extracted feature summaries from all the files for the given time window. all_summaries = [[] for x in range(len(time_windows_ints))] #i need to use partial and redifine this otherwise multiprocessing since it will not be pickable _process_row = partial(process_helper, summary_func=summary_func, time_windows_ints=time_windows_ints, time_units=time_units) data2process = [x for x in df_files[0].iterrows()] n_processes = max(n_processes, 1) if n_processes <= 1: gen = map(_process_row, data2process) else: p = mp.Pool(n_processes) gen = p.imap(_process_row, data2process) for ii, (ifile, df_list) in enumerate(gen): #reformat the outputs and remove any failed for iwin, df in enumerate(df_list): df.insert(0, 'file_id', ifile) all_summaries[iwin].append(df) if not df.empty: df_files[iwin].loc[ifile, 'is_good'] = True _displayProgress(ii + 1) # EM : Concatenate summaries for each window into one dataframe and select features for iwin in range(len(time_windows_ints)): all_summaries[iwin] = pd.concat(all_summaries[iwin], ignore_index=True, sort=False) all_summaries[iwin] = select_features(all_summaries[iwin], keywords_in, keywords_ex, selected_feat) # EM : Save results if select_feat != 'all': win_save_base_name = save_base_name.replace( 'tierpsy', select_feat + '_tierpsy') else: win_save_base_name = save_base_name if not (len(time_windows_ints) == 1 and time_windows_ints[0] == [0, -1]): win_save_base_name = win_save_base_name + '_window_{}'.format(iwin) f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(win_save_base_name)) f2 = os.path.join(root_dir, 'features_{}.csv'.format(win_save_base_name)) df_files[iwin].to_csv(f1, index=False) all_summaries[iwin].to_csv(f2, index=False) out = '****************************' out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2) print_flush(out) return df_files, all_summaries
def smooth_skeletons_table(skeletons_file, features_file, is_WT2=False, skel_smooth_window=5, coords_smooth_window_s=0.25, gap_to_interp_s=0.25): #%% #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps * coords_smooth_window_s)) gap_to_interp = int(np.round(fps * gap_to_interp_s)) if coords_smooth_window <= 3: #do not interpolate coords_smooth_window = None trajectories_data = _r_fill_trajectories_data(skeletons_file) #%% trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(skeletons_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) #%% #initialize arrays food_cnt = read_food_contour(skeletons_file) with tables.File(skeletons_file, 'r') as fid: n_segments = fid.get_node('/skeleton').shape[1] with tables.File(features_file, 'w') as fid_features: if food_cnt is not None: fid_features.create_array('/', 'food_cnt_coord', obj=food_cnt.astype(np.float32)) worm_coords_array = {} w_node = fid_features.create_group('/', 'coordinates') for array_name in [ 'skeletons', 'dorsal_contours', 'ventral_contours', 'widths' ]: if array_name != 'widths': a_shape = (0, n_segments, 2) else: a_shape = (0, n_segments) worm_coords_array[array_name] = fid_features.create_earray( w_node, array_name, shape=a_shape, atom=tables.Float32Atom(shape=()), filters=TABLE_FILTERS) tot_skeletons = 0 for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): if worm_data['was_skeletonized'].sum() < 2: continue worm = WormFromTable(skeletons_file, worm_index, worm_index_type='worm_index_joined') if is_WT2: worm.correct_schafer_worm() if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2: warnings.warn('Not enough data to smooth. Empty file?') wormN = worm else: wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour, worm.dorsal_contour, skel_smooth_window=skel_smooth_window, coords_smooth_window=coords_smooth_window, gap_to_interp=gap_to_interp) dat_index = pd.Series(False, index=worm_data['timestamp_raw'].values) try: dat_index[worm.timestamp] = True except ValueError: import pdb pdb.set_trace() #%% skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons tot_skeletons = skeleton_id[-1] + 1 row_ind = worm_data.index[dat_index.values] trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id #%% #add data worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton')) worm_coords_array['dorsal_contours'].append( getattr(wormN, 'dorsal_contour')) worm_coords_array['ventral_contours'].append( getattr(wormN, 'ventral_contour')) worm_coords_array['widths'].append(getattr(wormN, 'widths')) #display progress _display_progress(ind_n + 1) #save trajectories data newT = fid_features.create_table( '/', 'trajectories_data', obj=trajectories_data.to_records(index=False), filters=TABLE_FILTERS) copy_unit_conversions(newT, skeletons_file) newT._v_attrs['is_WT2'] = is_WT2 newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file) #save blob features interpolating in dropped frames and stage movement (WT2) blob_features = _r_fill_blob_features(skeletons_file, trajectories_data, is_WT2) if blob_features is not None: fid_features.create_table( '/', 'blob_features', obj=blob_features.to_records(index=False), filters=TABLE_FILTERS)
def getFoodFeatures(mask_file, skeletons_file, features_file=None, cnt_method='NN', solidity_th=0.98, batch_size=100000, _is_debug=False): if features_file is None: features_file = remove_ext(skeletons_file) + '_featuresN.hdf5' base_name = get_base_name(mask_file) progress_timer = TimeCounter('') print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str())) food_cnt = calculate_food_cnt(mask_file, method=cnt_method, solidity_th=solidity_th, _is_debug=_is_debug) microns_per_pixel = read_microns_per_pixel(skeletons_file) #store contour coordinates in pixels into the skeletons file for visualization purposes food_cnt_pix = food_cnt / microns_per_pixel with tables.File(skeletons_file, 'r+') as fid: if '/food_cnt_coord' in fid: fid.remove_node('/food_cnt_coord') if _is_valid_cnt(food_cnt): tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt_pix) tab._v_attrs['method'] = cnt_method print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str())) feats_names = [ 'orient_to_food_cnt', 'dist_from_food_cnt', 'closest_cnt_ind' ] feats_dtypes = [(x, np.float32) for x in feats_names] with tables.File(skeletons_file, 'r') as fid: tot_rows = fid.get_node('/skeleton').shape[0] features_df = np.full(tot_rows, np.nan, dtype=feats_dtypes) if food_cnt.size > 0: for ii in range(0, tot_rows, batch_size): skeletons = fid.get_node('/skeleton')[ii:ii + batch_size] skeletons *= microns_per_pixel outputs = get_cnt_feats(skeletons, food_cnt, _is_debug=_is_debug) for irow, row in enumerate(zip(*outputs)): features_df[irow + ii] = row with tables.File(features_file, 'a') as fid: if '/food' in fid: fid.remove_node('/food', recursive=True) fid.create_group('/', 'food') if _is_valid_cnt(food_cnt): fid.create_carray('/food', 'cnt_coordinates', obj=food_cnt, filters=TABLE_FILTERS) fid.create_table('/food', 'features', obj=features_df, filters=TABLE_FILTERS) #%% print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str()))
def get_ffprobe_metadata(video_file): if not os.path.exists(video_file): raise FileNotFoundError(video_file) if not os.path.exists(FFPROBE_CMD): raise FileNotFoundError('ffprobe do not found.') command = [ FFPROBE_CMD, '-v', 'error', '-show_frames', '-print_format', 'compact', video_file] base_name = video_file.rpartition('.')[0].rpartition(os.sep)[-1] progressTime = TimeCounter(base_name + ' Extracting video metadata.') frame_number = 0 buff = [] buff_err = [] proc = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) buf_reader = ReadEnqueue(proc.stdout, timeout=1) buf_reader_err = ReadEnqueue(proc.stderr) while proc.poll() is None: # read line without blocking line = buf_reader.read() if line is None: print('cannot read') else: buff.append(line) if "media_type" in line: #i use the filed "media_type" as a proxy for frame number (just in case the media does not have frame number) frame_number += 1 if frame_number % 500 == 0: print_flush(progressTime.get_str(frame_number)) line = buf_reader_err.read() if line is not None: buff_err.append(None) #the buff is in the shape # frame|feat1=val1|feat2=val2|feat3=val3\n # I want to store each property as a vector dat = [[d.split('=') for d in x.split('|')] for x in ''.join(buff).split('\n')] # use the first frame as reference frame_fields = [x[0] for x in dat[0] if len(x) == 2] # store data into numpy arrays video_metadata = OrderedDict() for row in dat: row_fields = [x[0] for x in dat[0] if len(x) == 2] for dd in row: if (len(dd) != 2) or (not dd[0] in frame_fields): continue field, value = dd if not field in video_metadata: video_metadata[field] = [] try: # if possible convert the data into float value = float(value) except (ValueError, TypeError): if value == 'N/A': value = np.nan else: # pytables does not support unicode strings (python3) #the str before is to convert a possible dictionary into a string before converting it to bytes value = bytes(str(value), 'utf-8') video_metadata[field].append(value) #convert all the lists into numpy arrays video_metadata = {field:np.asarray(values) for field,values in video_metadata.items()} #convert data into a recarray to store in pytables video_metadata = dict2recarray(video_metadata) #sometimes the last frame throws a nan in the timestamp. I want to remove it if np.isnan(video_metadata[-1]['best_effort_timestamp']): video_metadata = video_metadata[:-1] #if there is still nan's raise an error if np.any(np.isnan(video_metadata['best_effort_timestamp'])): raise ValueError('The timestamp contains nan values') return video_metadata
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, _is_debug=False, **fold_args): save_base_name = 'summary_{}_{}'.format(feature_type, summary_type) if is_manual_index: save_base_name += '_manual' save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') #check the options are valid check_in_list(feature_type, valid_feature_types, 'feature_type') check_in_list(summary_type, valid_summary_types, 'summary_type') summary_func = get_summary_func(feature_type, summary_type, is_manual_index, **fold_args) possible_ext = feature_files_ext[feature_type] ext = possible_ext[1] if is_manual_index else possible_ext[0] fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True) if not fnames: print_flush('Not valid files found. Nothing to do here.') return dd = tuple(zip(*enumerate(sorted(fnames)))) df_files = pd.DataFrame({'file_id': dd[0], 'file_name': dd[1]}) df_files['is_good'] = False progress_timer = TimeCounter('') def _displayProgress(n): args = (n + 1, len(df_files), progress_timer.get_time_str()) dd = "Extracting features summary. File {} of {} done. Total time: {}".format( *args) print_flush(dd) _displayProgress(-1) all_summaries = [] for ifile, row in df_files.iterrows(): fname = row['file_name'] try: df = summary_func(fname) df.insert(0, 'file_id', ifile) all_summaries.append(df) except (IOError, KeyError, tables.exceptions.HDF5ExtError, tables.exceptions.NoSuchNodeError): continue df_files.loc[ifile, 'is_good'] = True _displayProgress(ifile) all_summaries = pd.concat(all_summaries, ignore_index=True, sort=False) f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(save_base_name)) df_files.to_csv(f1, index=False) f2 = os.path.join(root_dir, 'features_{}.csv'.format(save_base_name)) all_summaries.to_csv(f2, index=False) out = '****************************' out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2) print_flush(out) return df_files, all_summaries
def filterByPopulationMorphology(skeletons_file, good_skel_row, critical_alpha=0.01): base_name = get_base_name(skeletons_file) progress_timer = TimeCounter('') print_flush(base_name + ' Filter Skeletons: Starting...') with pd.HDFStore(skeletons_file, 'r') as table_fid: trajectories_data = table_fid['/trajectories_data'] if not 'is_good_skel' in trajectories_data: trajectories_data['is_good_skel'] = trajectories_data['has_skeleton'] if good_skel_row.size > 0: # nothing to do if there are not valid skeletons left. print_flush( base_name + ' Filter Skeletons: Reading features for outlier identification.') #add possible missing fields that were con calculated in older versions of the software _addMissingFields(skeletons_file) # calculate classifier for the outliers nodes4fit = ['/skeleton_length', '/contour_area', '/width_midbody'] worm_morph = _h_nodes2Array(skeletons_file, nodes4fit, -1) #worm_morph[~trajectories_data['is_good_skel'].values] = np.nan feats4fit = [worm_morph] #feats4fit = _h_readFeat2Check(skeletons_file) print_flush(base_name + ' Filter Skeletons: Calculating outliers. Total time:' + progress_timer.get_time_str()) tot_rows2fit = feats4fit[0].shape[0] # check all the data to fit has the same size in the first axis assert all(tot_rows2fit == featdat.shape[0] for featdat in feats4fit) outliers_rob = np.zeros(tot_rows2fit, np.bool) outliers_flag = np.zeros(tot_rows2fit, np.int) assert len(feats4fit) < 64 # otherwise the outlier flag will not work for out_ind, dat in enumerate(feats4fit): maha, out_d, lim_d = _h_getMahalanobisRobust( dat, critical_alpha, good_skel_row) outliers_rob = outliers_rob | out_d # flag the outlier flag by turning on the corresponding bit outliers_flag += (out_d) * (2**out_ind) print_flush( base_name + ' Filter Skeletons: Labeling valid skeletons. Total time:' + progress_timer.get_time_str()) # labeled rows of valid individual skeletons as GOOD_SKE trajectories_data['is_good_skel'] &= ~outliers_rob trajectories_data['skel_outliers_flag'] = outliers_flag # Save the new is_good_skel column if trajectories_data['is_good_skel'].dtypes == bool: trajectories_data['is_good_skel'] = trajectories_data[ 'is_good_skel'].astype(np.uint8) save_modified_table(skeletons_file, trajectories_data, 'trajectories_data') print_flush(base_name + ' Filter Skeletons: Finished. Total time:' + progress_timer.get_time_str())
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, abbreviate_features, dorsal_side_known, time_windows='0:end', time_units=None, select_feat='all', keywords_include='', keywords_exclude='', _is_debug=False, **kwargs): """ Gets input from the GUI, calls the function that chooses the type of summary and runs the summary calculation for each file in the root_dir. """ filter_args = {k: kwargs[k] for k in kwargs.keys() if 'filter' in k} fold_args = {k: kwargs[k] for k in kwargs.keys() if 'filter' not in k} save_base_name = 'summary_{}_{}'.format(feature_type, summary_type) if is_manual_index: save_base_name += '_manual' save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') #check the options are valid check_in_list(feature_type, valid_feature_types, 'feature_type') check_in_list(summary_type, valid_summary_types, 'summary_type') # EM : convert time windows to list of integers in frame number units time_windows_ints = time_windows_parser(time_windows) filter_params = filter_args_parser(filter_args) # EM: get lists of strings (in a tuple) defining the feature selection # from keywords_in, # keywords_ex and select_feat. selected_feat = select_parser(feature_type, keywords_include, keywords_exclude, select_feat, dorsal_side_known) #get summary function # INPUT time windows time units here summary_func = get_summary_func(feature_type, summary_type, time_windows_ints, time_units, selected_feat, dorsal_side_known, filter_params, is_manual_index, **fold_args) #get extension of results file possible_ext = feature_files_ext[feature_type] ext = possible_ext[1] if is_manual_index else possible_ext[0] fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True) if not fnames: print_flush('No valid files found. Nothing to do here.') return None, None # EM :Make df_files dataframe with filenames and file ids df_files = make_df_filenames(fnames) # EM : Create features_summaries and filenames_summaries files # and write headers fnames_files = [] featsum_files = [] for iwin in range(len(time_windows_ints)): # EM : Create features_summaries and filenames_summaries files if select_feat != 'all': win_save_base_name = save_base_name.replace( 'tierpsy', select_feat + '_tierpsy') else: win_save_base_name = save_base_name if not (len(time_windows_ints) == 1 and time_windows_ints[0] == [0, -1]): win_save_base_name = win_save_base_name + '_window_{}'.format(iwin) f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(win_save_base_name)) f2 = os.path.join(root_dir, 'features_{}.csv'.format(win_save_base_name)) fnamesum_headers = get_fnamesum_headers(f2, feature_type, summary_type, iwin, time_windows_ints[iwin], time_units, len(time_windows_ints), select_feat, filter_params, df_files.columns.to_list()) featsum_headers = get_featsum_headers(f1) with open(f1, 'w') as fid: fid.write(fnamesum_headers) with open(f2, 'w') as fid: fid.write(featsum_headers) fnames_files.append(f1) featsum_files.append(f2) progress_timer = TimeCounter('') def _displayProgress(n): args = (n + 1, len(df_files), progress_timer.get_time_str()) dd = "Extracting features summary. " dd += "File {} of {} done. Total time: {}".format(*args) print_flush(dd) _displayProgress(-1) # EM : Extract feature summaries from all the files for all time windows. is_featnames_written = [False for i in range(len(time_windows_ints))] for ifile, row in df_files.iterrows(): fname = row['filename'] file_id = row['file_id'] summaries_per_win = summary_func(fname) for iwin, df in enumerate(summaries_per_win): f1 = fnames_files[iwin] f2 = featsum_files[iwin] try: df.insert(0, 'file_id', file_id) df = sort_columns(df, selected_feat) except (AttributeError, IOError, KeyError, tables.exceptions.HDF5ExtError, tables.exceptions.NoSuchNodeError): continue else: # Get the filename summary line filenames = row.copy() if not df.empty: filenames['is_good'] = True # Store the filename summary line with open(f1, 'a') as fid: fid.write(','.join([str(x) for x in filenames.values]) + "\n") if not df.empty: # Abbreviate names if abbreviate_features: df = shorten_feature_names(df) # Store line(s) of features summaries for the given file # and given window with open(f2, 'a') as fid: if not is_featnames_written[iwin]: df.to_csv(fid, header=True, index=False) is_featnames_written[iwin] = True else: df.to_csv(fid, header=False, index=False) _displayProgress(ifile) out = '****************************' out += '\nFINISHED. Created Files:' for f1, f2 in zip(fnames_files, featsum_files): out += '\n-> {}\n-> {}'.format(f1, f2) print_flush(out) return df_files
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, time_windows, time_units, select_feat, keywords_include, keywords_exclude, abbreviate_features, _is_debug = False, **fold_args): """ Gets input from the GUI, calls the function that chooses the type of summary and runs the summary calculation for each file in the root_dir. """ save_base_name = 'summary_{}_{}'.format(feature_type, summary_type) if is_manual_index: save_base_name += '_manual' save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') #check the options are valid check_in_list(feature_type, valid_feature_types, 'feature_type') check_in_list(summary_type, valid_summary_types, 'summary_type') # EM : convert time windows to list of integers in frame number units time_windows_ints = time_windows_parser(time_windows) # EM : get list of keywords to include and to exclude # TODO: catch conflicts keywords_in = keywords_parser(keywords_include) keywords_ex = keywords_parser(keywords_exclude) # EM : get full path to feature set file selected_feat = feat_set_parser(select_feat) #get summary function # INPUT time windows time units here summary_func = get_summary_func(feature_type, summary_type, time_windows_ints, time_units, is_manual_index, **fold_args) #get extension of results file possible_ext = feature_files_ext[feature_type] ext = possible_ext[1] if is_manual_index else possible_ext[0] fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True) if not fnames: print_flush('No valid files found. Nothing to do here.') return None,None # EM :Make df_files list with one features_summaries dataframe per time window df_files = make_df_filenames(fnames,time_windows_ints) progress_timer = TimeCounter('') def _displayProgress(n): args = (n + 1, len(df_files[0]), progress_timer.get_time_str()) dd = "Extracting features summary. File {} of {} done. Total time: {}".format(*args) print_flush(dd) _displayProgress(-1) # EM :Make all_summaries list with one element per time window. Each element contains # the extracted feature summaries from all the files for the given time window. all_summaries = [[] for x in range(len(time_windows_ints))] for ifile, row in df_files[0].iterrows(): fname = row['file_name'] df_list = summary_func(fname) for iwin,df in enumerate(df_list): try: df.insert(0, 'file_id', ifile) all_summaries[iwin].append(df) except (AttributeError, IOError, KeyError, tables.exceptions.HDF5ExtError, tables.exceptions.NoSuchNodeError): continue else: if not df.empty: df_files[iwin].loc[ifile, 'is_good'] = True _displayProgress(ifile) # EM : Concatenate summaries for each window into one dataframe and select features for iwin in range(len(time_windows_ints)): all_summaries[iwin] = pd.concat(all_summaries[iwin], ignore_index=True, sort=False) all_summaries[iwin] = select_features(all_summaries[iwin],keywords_in,keywords_ex,selected_feat) #IB : add in the option to abbreviate features if abbreviate_features: all_summaries[iwin] = shorten_feature_names(all_summaries[iwin]) # EM : Save results if select_feat != 'all': win_save_base_name = save_base_name.replace('tierpsy',select_feat+'_tierpsy') else: win_save_base_name = save_base_name if not (len(time_windows_ints)==1 and time_windows_ints[0]==[0,-1]): win_save_base_name = win_save_base_name+'_window_{}'.format(iwin) f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(win_save_base_name)) f2 = os.path.join(root_dir,'features_{}.csv'.format(win_save_base_name)) fnamesum_headers = get_fnamesum_headers( f2,feature_type,summary_type,iwin,time_windows_ints[iwin], time_units,len(time_windows_ints),select_feat) featsum_headers = get_featsum_headers(f1) with open(f1,'w') as fid: fid.write(fnamesum_headers) df_files[iwin].to_csv(fid, index=False) with open(f2,'w') as fid: fid.write(featsum_headers) all_summaries[iwin].to_csv(fid, index=False) out = '****************************' out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1,f2) print_flush(out) return df_files, all_summaries
def getIntensityProfile(masked_image_file, skeletons_file, intensities_file, width_resampling=15, length_resampling=131, min_num_skel=100, smooth_win=11, pol_degree=3, width_percentage=0.5, save_maps=False): min_num_skel = min_num_skel_defaults(skeletons_file, min_num_skel=min_num_skel) assert smooth_win > pol_degree assert min_num_skel > 0 assert 0 < width_percentage < 1 # we want to use symetrical distance centered in the skeleton if length_resampling % 2 == 0: length_resampling += 1 if width_resampling % 2 == 0: width_resampling += 1 # get the limits to be averaged from the intensity map if save_maps: width_win_ind = getWidthWinLimits(width_resampling, width_percentage) else: width_win_ind = (0, width_resampling) # filters for the tables structures table_filters = tables.Filters(complevel=5, complib='zlib', shuffle=True, fletcher32=True) # Get a reduced version of the trajectories_data table with only the valid skeletons. # The rows of this new table are going to be saved into skeletons_file trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel) # let's save this new table into the intensities file with tables.File(intensities_file, 'w') as fid: fid.create_table('/', 'trajectories_data_valid', obj=trajectories_data_valid.to_records(index=False), filters=table_filters) tot_rows = len(trajectories_data_valid) if tot_rows == 0: with tables.File(intensities_file, "r+") as int_file_id: # nothing to do here let's save empty data and go out worm_int_avg_tab = int_file_id.create_array( "/", "straighten_worm_intensity_median", obj=np.zeros(0)) worm_int_avg_tab._v_attrs['has_finished'] = 1 return with tables.File(masked_image_file, 'r') as mask_fid, \ tables.File(skeletons_file, 'r') as ske_file_id, \ tables.File(intensities_file, "r+") as int_file_id: # pointer to the compressed videos mask_dataset = mask_fid.get_node("/mask") # pointer to skeletons skel_tab = ske_file_id.get_node('/skeleton') skel_width_tab = ske_file_id.get_node('/width_midbody') filters = tables.Filters(complevel=5, complib='zlib', shuffle=True) # we are using Float16 to save space, I am assuing the intensities are # between uint8 worm_int_avg_tab = int_file_id.create_carray( "/", "straighten_worm_intensity_median", tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling), chunkshape=(1, length_resampling), filters=table_filters) worm_int_avg_tab._v_attrs['has_finished'] = 0 worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind if save_maps: worm_int_tab = int_file_id.create_carray( "/", "straighten_worm_intensity", tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling, width_resampling), chunkshape=(1, length_resampling, width_resampling), filters=table_filters) grouped_frames = trajectories_data_valid.groupby('frame_number') # variables used to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] progressTime = TimeCounter('Obtaining intensity maps.', len(grouped_frames)) for frame, frame_data in grouped_frames: img = mask_dataset[frame, :, :] for ii, row_data in frame_data.iterrows(): skeleton_id = int(row_data['skeleton_id']) worm_index = int(row_data['worm_index_joined']) int_map_id = int(row_data['int_map_id']) # read ROI and skeleton, and put them in the same coordinates # map worm_img, roi_corner = getWormROI(img, row_data['coord_x'], row_data['coord_y'], row_data['roi_size']) skeleton = skel_tab[skeleton_id, :, :] - roi_corner half_width = skel_width_tab[skeleton_id] / 2 assert not np.isnan(skeleton[0, 0]) skel_smooth = smoothSkeletons( skeleton, length_resampling=length_resampling, smooth_win=smooth_win, pol_degree=pol_degree) straighten_worm, grid_x, grid_y = getStraightenWormInt( worm_img, skel_smooth, half_width=half_width, width_resampling=width_resampling) # if you use the mean it is better to do not use float16 int_avg = np.median( straighten_worm[width_win_ind[0]:width_win_ind[1], :], axis=0) worm_int_avg_tab[int_map_id] = int_avg # only save the full map if it is specified by the user if save_maps: worm_int_tab[int_map_id] = straighten_worm.T if frame % 500 == 0: progress_str = progressTime.get_str(frame) print_flush(base_name + ' ' + progress_str) worm_int_avg_tab._v_attrs['has_finished'] = 1
def assignBlobTrajDF(traj_df, max_allowed_dist, area_ratio_lim, base_name=''): def _get_cost_matrix(frame_data, frame_data_prev): coord = frame_data[['coord_x', 'coord_y']].values coord_prev = frame_data_prev[['coord_x', 'coord_y']].values costMatrix = cdist(coord_prev, coord) # calculate the cost matrix # assign a large value to non-valid combinations by area area = frame_data['area'].values area_prev = frame_data_prev['area'].values area_ratio = area_prev[:, None] / area[None, :] area_ratio[np.isnan(area_ratio)] = 1e20 bad_ratio = (area_ratio<area_ratio_lim[0]) | \ (area_ratio>area_ratio_lim[1]) | \ np.isnan(costMatrix) costMatrix[bad_ratio] = 1e20 return costMatrix def _get_prev_ind_match(costMatrix): def _label_bad_ind(indexes, dist, max_allowed_dist): #label as bad the pairs that have a distance larger than max_allowed_dist indexes[dist > max_allowed_dist] = -1 #remove indexes that where assigned twice (either a merge or a split event) uind, counts = np.unique(indexes, return_counts=True) duplicated_ind = uind[counts > 1] bad_ind = np.in1d(indexes, duplicated_ind) indexes[bad_ind] = -1 return indexes #I get the corresponding index in the previous data_frame #I remove pairs located at positions larger than max_allowed_dist #And indexes that where assigned twice or more (split events) map_to_prev = np.argmin(costMatrix, axis=0) #must have dimensions of frame_data min_dist_pp = costMatrix[map_to_prev, np.arange(costMatrix.shape[1])] _label_bad_ind(map_to_prev, min_dist_pp, max_allowed_dist) #here i am looking at in the prev indexes that would have been #assigned twice or more to the next indexes (merge events) map_to_next = np.argmin( costMatrix, axis=1) #must have dimensions of frame_data_prev min_dist_pp = costMatrix[np.arange(costMatrix.shape[0]), map_to_next] _label_bad_ind(map_to_next, min_dist_pp, max_allowed_dist) bad_prev_ind = np.where(map_to_next == -1)[ 0] #techincally either index too far away or duplicated possible_merges = np.in1d(map_to_prev, bad_prev_ind) map_to_prev[possible_merges] = -1 return map_to_prev frame_data_prev = None tot_worms = 0 all_indexes = [] frames_grouped = traj_df.groupby('frame_number') #if isinstance(area_ratio_lim, (float, int)): # area_ratio_lim = (1/area_ratio_lim, area_ratio_lim) progress_time = TimeCounter(base_name + ' Assigning trajectories.', len(frames_grouped)) for frame, frame_data in frames_grouped: #what happens if the frames are not continous? if frame_data_prev is not None: _, prev_traj_ind = all_indexes[-1] costMatrix = _get_cost_matrix(frame_data, frame_data_prev) map_to_prev = _get_prev_ind_match(costMatrix) traj_indexes = np.zeros_like(map_to_prev) unmatched = map_to_prev == -1 matched = ~unmatched #assign matched index from the previous indexes traj_indexes[matched] = prev_traj_ind[map_to_prev[matched]] vv = np.arange(1, np.sum(unmatched) + 1) + tot_worms if vv.size > 0: tot_worms = vv[-1] traj_indexes[unmatched] = vv else: # initialize worm indexes traj_indexes = tot_worms + np.arange(1, len(frame_data) + 1) tot_worms = traj_indexes[-1] all_indexes.append((frame_data.index, traj_indexes)) frame_data_prev = frame_data if frame % 500 == 0: # calculate the progress and put it in a string print_flush(progress_time.get_str(frame)) if all_indexes: row_ind, traj_ind = map(np.concatenate, zip(*all_indexes)) traj_ind = traj_ind[np.argsort(row_ind)] return traj_ind
def correctHeadTail(skeletons_file, **params): ''' Correct Head Tail orientation using skeleton movement. Head must move more than the tail (have a higher rolling standar deviation). This might fail if the amount of contingously skeletonized frames is too little (a few seconds). Head must be in the first position of the single frame skeleton array, while the tail must be in the last. max_gap_allowed - maximimun number of consecutive skeletons lost before consider it a new block window_std - frame windows to calculate the standard deviation segment4angle - separation between skeleton segments to calculate the angles min_block_size - consider only around 10s intervals to determine if it is head or tail... ''' params = head_tail_defaults(skeletons_file, **params) max_gap_allowed = params['max_gap_allowed'] window_std = params['window_std'] segment4angle = params['segment4angle'] min_block_size = params['min_block_size'] base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] with pd.HDFStore(skeletons_file, 'r') as ske_file_id: indexes_data = ske_file_id['/trajectories_data'][[ 'worm_index_joined', 'skeleton_id' ]] # get the first and last frame of each worm_index rows_indexes = indexes_data.groupby('worm_index_joined').agg( [min, max])['skeleton_id'] del indexes_data # check if the skeletonization finished succesfully with tables.File(skeletons_file, "r") as ske_file_id: skeleton_table = ske_file_id.get_node('/skeleton') if 'has_finished' in dir(skeleton_table._v_attrs): assert skeleton_table._v_attrs['has_finished'] >= 2 progress_timer = TimeCounter('') for ii, dat in enumerate(rows_indexes.iterrows()): if ii % 10 == 0: dd = " Correcting Head-Tail using worm movement. Worm %i of %i." % ( ii + 1, len(rows_indexes)) dd = base_name + dd + ' Total time:' + progress_timer.get_time_str( ) print_flush(dd) worm_index, row_range = dat worm_data = WormClass(skeletons_file, worm_index, rows_range=(row_range['min'], row_range['max'])) if not np.all(np.isnan(worm_data.skeleton_length)): is_switched_skel, roll_std = isWormHTSwitched( worm_data.skeleton, segment4angle=segment4angle, max_gap_allowed=max_gap_allowed, window_std=window_std, min_block_size=min_block_size) worm_data.switchHeadTail(is_switched_skel) worm_data.writeData() #%% print_flush('Head-Tail correction using worm movement finished:' + progress_timer.get_time_str()) with tables.File(skeletons_file, "r+") as ske_file_id: # Mark a succesful termination ske_file_id.get_node('/skeleton')._v_attrs['has_finished'] = 3