def time_windows_parser(time_windows): """ Converts the string input from the GUI to a list object of integers. Asserts that for each time window start_time<=end_time """ if not time_windows.replace(' ', ''): windows = [[0, -1]] return windows if valid_time_windows_connector not in time_windows: ValueError(time_windows_format_explain) return # Remove spaces and replace end with -1 windows = time_windows.replace(' ', '').replace('end', '-1') # Split at ; to separate time windows, then split each non-empty time window at : windows = [ x.split(':') for x in windows.split(valid_time_windows_separator) if x ] # Convert to integers try: windows = [[int(x) for x in wdw] for wdw in windows] except ValueError: print_flush( 'Time windows input could not be converted to list of integers.' + time_windows_format_explain) raise else: for iwin, window in enumerate(windows): if window[1] != -1: assert window[0] <= window[ 1], "The end time of time window {}/{} is smaller than the start time.".format( iwin + 1, len(windows)) return windows
def time_to_frame_nb(time_windows, time_units, fps, timestamp, fname): """ Converts the time windows to units of frame numbers (if they were defined in seconds). It also defines the end frame of a window, if the index is set to -1 (end). """ if timestamp.empty: return from copy import deepcopy time_windows_frames = deepcopy(time_windows) if time_units == 'seconds': assert fps != -1 for iwin in range(len(time_windows_frames)): for ilim in range(2): if time_windows_frames[iwin][ilim] != -1: time_windows_frames[iwin][ilim] = round( time_windows_frames[iwin][ilim] * fps) last_frame = timestamp.sort_values().iloc[-1] for iwin in range(len(time_windows_frames)): # If a window ends with -1, replace with the frame number of the last frame (or the start frame of the window+1 if window out of bounds) if time_windows_frames[iwin][1] == -1: time_windows_frames[iwin][1] = max(last_frame + 1, time_windows_frames[iwin][0]) # If a window is out of bounds, print warning if time_windows_frames[iwin][0] > last_frame: print_flush( 'Warning: The start time of window {}/{} is out of bounds of file \'{}\'.' .format(iwin + 1, len(time_windows_frames), fname)) return time_windows_frames
def getFoodContour(mask_file, skeletons_file, use_nn_food_cnt, model_path, solidity_th=0.98, _is_debug = False ): base_name = get_base_name(mask_file) progress_timer = TimeCounter('') print_flush("{} Calculating food contour {}".format(base_name, progress_timer.get_time_str())) food_cnt = calculate_food_cnt(mask_file, use_nn_food_cnt = use_nn_food_cnt, model_path = model_path, solidity_th= solidity_th, _is_debug = _is_debug) #store contour coordinates into the skeletons file and mask_file the contour file for fname in [skeletons_file, mask_file]: with tables.File(fname, 'r+') as fid: if '/food_cnt_coord' in fid: fid.remove_node('/food_cnt_coord') #if it is a valid contour save it if food_cnt is not None and \ food_cnt.size >= 2 and \ food_cnt.ndim == 2 and \ food_cnt.shape[1] == 2: tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt) tab._v_attrs['use_nn_food_cnt'] = int(use_nn_food_cnt)
def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n+1, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str())
def _display_progress(n): # display progress dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str())
def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str())
def _copyFilesLocal(self, files2copy): ''' copy files to the source directory''' for files in files2copy: file_name, destination = files assert(os.path.exists(destination)) if os.path.abspath(os.path.dirname(file_name)) != os.path.abspath(destination): print_flush('Copying %s to %s' % (file_name, destination)) shutil.copy(file_name, destination)
def no_fps(time_units, fps): if time_units == 'seconds' and fps == -1: print_flush(""" Warning: The time windows were defined in seconds, but fps for file \'{}\' is unknown. Define time windows in frame numbers instead. """.format(fname)) return True else: return False
def createSampleVideo(masked_image_file, sample_video_name='', time_factor=8, size_factor=5, skip_factor=2, dflt_fps=30, codec='MPEG', shift_bgnd=False): #skip factor is to reduce the size of the movie by using less frames (so we use 15fps for example instead of 30fps) #%% if not sample_video_name: sample_video_name = getSubSampleVidName(masked_image_file) # initialize timers base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progressTime = TimeCounter( '{} Generating subsampled video.'.format(base_name)) with tables.File(masked_image_file, 'r') as fid: masks = fid.get_node('/mask') tot_frames, im_h, im_w = masks.shape im_h, im_w = im_h // size_factor, im_w // size_factor fps = read_fps(masked_image_file, dflt_fps) tt_vec = _getCorrectedTimeVec(fid, tot_frames) #%% #codec values that work 'H264' #'MPEG' #XVID vid_writer = cv2.VideoWriter(sample_video_name, \ cv2.VideoWriter_fourcc(*codec), fps/skip_factor, (im_w,im_h), isColor=False) assert vid_writer.isOpened() if shift_bgnd: #lazy bgnd calculation, just take the last and first frame and get the top 95 pixel value mm = masks[[0, -1], :, :] _bgnd_val = np.percentile(mm[mm != 0], [97.5])[0] for frame_number in range(0, tot_frames, int(time_factor * skip_factor)): current_frame = int(tt_vec[frame_number]) img = masks[current_frame] if shift_bgnd: img[img == 0] = _bgnd_val im_new = cv2.resize(img, (im_w, im_h)) vid_writer.write(im_new) if frame_number % (500 * time_factor) == 0: # calculate the progress and put it in a string print_flush(progressTime.get_str(frame_number)) vid_writer.release() print_flush(progressTime.get_str(frame_number) + ' DONE.')
def no_attr_flush(attr, fname): if attr == 'fps': out = ['seconds', 'frames_per_second', fname, 'frame numbers'] elif attr == 'mpp': out = ['microns', 'microns_per_pixel', fname, 'pixels'] print_flush(""" Warning: some of the summarizer input were given in {0}, but the {1} ratio for file \'{2}\' is unknown. Give input in {3} instead. """.format(*out)) return
def get_food_contour(mask_video, min_area=None, n_bins=180, frac_lowess=0.1, is_debug=False): ''' Identify the contour of a food patch. I tested this for the worm rig. It assumes the food has a semi-circular shape. The food lawn is very thin so the challenge was to estimate the contour of a very dim area. ''' #%% progress_timer = TimeCounter('') base_name = get_base_name(mask_video) print_flush('{} Calculating food contour...'.format(base_name)) try: with tables.File(mask_video, 'r') as fid: full_data = fid.get_node( '/full_data' )[:5] # I am using the first two images to calculate this info except tables.exceptions.NoSuchNodeError: return None, None img = np.max(full_data[:2], axis=0) #dark_mask = get_dark_mask(full_data) mask = get_patch_mask(img, min_area=min_area) circx, circy, best_fit = mask_to_food_contour(mask, n_bins=n_bins, frac_lowess=frac_lowess) #%% dd = '{} Food contour calculated. Total time: {}'.format( base_name, progress_timer.get_time_str()) print_flush(dd) #%% if is_debug: from skimage.draw import circle_perimeter import matplotlib.pylab as plt cpx, cpy = circle_perimeter(*best_fit[1:]) plt.figure(figsize=(5, 5)) plt.gca().xaxis.set_ticklabels([]) plt.gca().yaxis.set_ticklabels([]) (px, py) = np.where(skeletonize(mask)) plt.imshow(img, cmap='gray') plt.plot(py, px, '.') plt.plot(cpx, cpy, '.r') plt.plot(circy, circx, '.') plt.suptitle(base_name) plt.grid('off') #%% return circx, circy
def _get_threshold(text): text = text.replace(' ','') if not text: return try: threshold = float(text) except ValueError: print_flush('The threshold for trajectories filtering must be a number.') return threshold
def alignStageMotion( masked_file, skeletons_file, tmp_dir=os.path.expanduser('~/Tmp')): assert os.path.exists(masked_file) assert os.path.exists(skeletons_file) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) base_name = os.path.split(masked_file)[1].partition('.hdf5')[0] # check if it was finished before # with tables.File(skeletons_file, 'r+') as fid: # try: # has_finished = fid.get_node('/stage_movement')._v_attrs['has_finished'][:] # except (KeyError, IndexError, tables.exceptions.NoSuchNodeError): # has_finished = 0 # if has_finished > 0: # print_flush('%s The stage motion was previously aligned.' % base_name) # return # get the current to add as a matlab path current_dir = os.path.dirname(os.path.abspath(__file__)) start_cmd = ('matlab -nojvm -nosplash -nodisplay -nodesktop <').split() script_cmd = "addpath('{}'); " \ "try, alignStageMotionSegwormFun('{}', '{}'); " \ "catch ME, disp(getReport(ME)); " \ "end; exit; " script_cmd = script_cmd.format( current_dir, masked_file, skeletons_file) # create temporary file to read as matlab script, works better than # passing a string in the command line. tmp_fid, tmp_script_file = tempfile.mkstemp( suffix='.m', dir=tmp_dir, text=True) with open(tmp_script_file, 'w') as fid: fid.write(script_cmd) matlab_cmd = start_cmd + [tmp_script_file] # call matlab and align the stage motion print_flush('%s Aligning Stage Motion.' % base_name) sp.call(matlab_cmd) print_flush('%s Alignment finished.' % base_name) # delete temporary file. os.close(tmp_fid) os.remove(tmp_script_file)
def clean(self): self._copyTmpToFinalAndClean() delta_t = time.time() - self.start_time time_str = datetime.timedelta(seconds = round(delta_t)) progress_str = '{} Finished. Total time {}.'.format(self.base_name, time_str) if len(self.unfinished_points_tmp) > 0: progress_str = '{} Missing analysis points in the tmp dir: {}.'.format(progress_str, self.unfinished_points_tmp) elif len(self.unfinished_points_src) > 0: progress_str = '''{} Missing analysis points in the final dir: {}. Problems when copy files?.'''.format(progress_str, self.unfinished_points_src) print_flush(progress_str)
def fix_wrong_merges(mask_video, skeletons_file, min_area_limit=50): #get the trajectories table base_name = get_base_name(skeletons_file) #%% #trajectories_data = rejoin_traj(skeletons_file, base_name) with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data_f = trajectories_data.copy() trajectories_data_f['worm_index_auto'] = trajectories_data_f['worm_index_joined'] #%% print_flush(base_name + ' Spliting wrong merge events.') max_n_iter = 10 #I do a few iterations because sometimes new trajectories are form when previous ones are splitted splitted_points = {} for ii in range(max_n_iter): #%% trajectories_data_f = filter_table_by_area(trajectories_data_f, worm_index_type = 'worm_index_auto', min_area_limit = min_area_limit ) #%% worm_index_new, points2split = split_trajectories(mask_video, trajectories_data_f, worm_index_type='worm_index_auto', min_area_limit=25, min_area_ratio=0.5, buf_size=11 ) #%% trajectories_data['worm_index_auto'] = np.int32(-1) #like that it force the unassigned indexes to be empty trajectories_data.loc[worm_index_new.index, 'worm_index_auto'] = worm_index_new.values #%% #print(points2split) if len(points2split) == 0: break else: for x in points2split: if not x in splitted_points: splitted_points[x] = [] splitted_points[x] += points2split[x] #%% return trajectories_data, points2split
def execAllPoints(self): base_name = self.ap.file_names['base_name'] if len(self.analysis_checkpoints) == 0: print_flush( '%s No checkpoints given. It seems that there is a previous analysis finished. Exiting.' % base_name) return pkgs_versions = getPackagesVersion() print_flush('%s Starting checkpoint: %s' % (base_name, self.analysis_checkpoints[0])) initial_time = time.time() for current_point in self.analysis_checkpoints: print(current_point) unmet_requirements = self.ap.hasRequirements(current_point) if len(unmet_requirements) != 0: print(unmet_requirements) break this_point_exists = self.ap.checker.get(current_point) if this_point_exists: print('this_point_exists', current_point) break execThisPoint(current_point, **self.ap.getArgs(current_point), pkgs_versions=pkgs_versions, cmd_original=self.cmd_original) time_str = str( datetime.timedelta(seconds=round(time.time() - initial_time))) if len(unmet_requirements) > 0: print_flush('''{} Finished early. Total time {}. Cannot continue for step {} because it does not sastify the requiriments: {}'''. format(base_name, time_str, current_point, unmet_requirements)) elif this_point_exists: existing_files = self.ap.getField('output_files', [current_point]).values() print_flush('''{} Finished early. Total time {}. the step {} already exists. Delete files if you want to continue: {}'''.format(base_name, time_str, current_point, existing_files)) else: print_flush('{} Finished in {}. Total time {}.'.format( base_name, current_point, time_str))
def generateROIBuff(masked_image_file, buffer_size, bgnd_param, progress_str='', progress_refresh_rate_s=20): img_generator = generateImages(masked_image_file, bgnd_param=bgnd_param) with tables.File(masked_image_file, 'r') as mask_fid: tot_frames, im_h, im_w = mask_fid.get_node("/mask").shape #loop, save data and display progress base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progress_str = base_name + progress_str fps = read_fps(masked_image_file, dflt=25) progress_refresh_rate = fps * progress_refresh_rate_s progress_time = TimeCounter(progress_str, tot_frames) for frame_number, image in img_generator: if frame_number % buffer_size == 0: if frame_number + buffer_size > tot_frames: buffer_size = tot_frames - frame_number #change this value, otherwise the buffer will not get full image_buffer = np.zeros((buffer_size, im_h, im_w), np.uint8) ini_frame = frame_number image_buffer[frame_number - ini_frame] = image #compress if it is the last frame in the buffer if (frame_number + 1) % buffer_size == 0 or (frame_number + 1 == tot_frames): # z projection and select pixels as connected regions that were selected as worms at # least once in the masks main_mask = np.any(image_buffer, axis=0) # change from bool to uint since same datatype is required in # opencv main_mask = main_mask.astype(np.uint8) #calculate the contours, only keep the external contours (no holes) and _, ROI_cnts, _ = cv2.findContours(main_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) yield ROI_cnts, image_buffer, ini_frame if frame_number % progress_refresh_rate == 0: print_flush(progress_time.get_str(frame_number)) print_flush(progress_time.get_str(frame_number))
def generateImages(masked_image_file, frames=[], bgnd_param={}, progress_str='', progress_refresh_rate_s=20): #loop, save data and display progress base_name = Path(masked_image_file).stem progress_str = base_name + progress_str fps = read_fps(masked_image_file, dflt=25) progress_refresh_rate = fps * progress_refresh_rate_s with tables.File(masked_image_file, 'r') as mask_fid: mask_dataset = mask_fid.get_node("/mask") tot_frames = mask_dataset.shape[0] progress_time = TimeCounter(progress_str, tot_frames) if len(bgnd_param) > 0: if '/bgnd' in mask_fid: bgnd_subtractor = BackgroundSubtractorPrecalculated( masked_image_file, **bgnd_param) else: bgnd_subtractor = BackgroundSubtractorMasked( masked_image_file, **bgnd_param) else: bgnd_subtractor = None if len(frames) == 0: frames = range(mask_dataset.shape[0]) for frame_number in frames: if frame_number % progress_refresh_rate == 0: print_flush(progress_time.get_str(frame_number)) image = mask_dataset[frame_number] if bgnd_subtractor is not None: image = bgnd_subtractor.apply(image, frame_number) yield frame_number, image print_flush(progress_time.get_str(frame_number))
def time_windows_parser(time_windows): """ EM : Converts the string input from the GUI to a list object of integers. Asserts that for each time window start_time<=end_time """ if not time_windows.replace(' ',''): windows = [[0,-1]] return windows if valid_time_windows_connector not in time_windows: raise ValueError('Invalid format of time windows: '+time_windows_format_explain) return # Remove spaces and replace end with -1 windows = time_windows.replace(' ','').replace('end','-1') # Split at ',' to separate time windows, then split each non-empty time window at '-' or ':' windows = [x.split(valid_time_windows_connector) for x in windows.split(valid_time_windows_separator) if x] # Convert to integers try: windows = [[int(x) for x in wdw] for wdw in windows] except ValueError: print_flush('Time windows input could not be converted to list of integers.'+time_windows_format_explain) raise else: fin_windows = [] for iwin,window in enumerate(windows): if len(window)==3: if window[1]==-1: raise ValueError('Invalid format of time windows: When the format start_time:end_time:step is used, the end_time has to be defined explicitly in seconds or frames. It cannot be defined as \'end\' or \'-1\'.') else: assert window[0]<=window[1], "Invalid format of time windows: The end time of time window {}/{} cannot be smaller than the start time.".format(iwin+1,len(windows)) assert window[2]<=window[1]-window[0], "Invalid format of time windows: The step size in time window {}/{} cannot be larger than the (end_time-start_time).".format(iwin+1,len(windows)) start,end,step = window step_wins = [[i,j] for i,j in zip(list(range(*window)),list(range(start+step,end,step))+[end])] for add in step_wins: fin_windows.append(add) elif len(window)==2: if window[1]!=-1: assert window[0]<=window[1], "Invalid format of time windows: The end time of time window {}/{} cannot be smaller than the start time.".format(iwin+1,len(windows)) fin_windows.append(window) else: ValueError('Invalid format of time windows: '+time_windows_format_explain) return fin_windows
def exportWCON(features_file, READ_FEATURES=False): base_name = os.path.basename(features_file).replace('_features.hdf5', '') print_flush("{} Exporting data to WCON...".format(base_name)) wcon_dict = exportWCONdict(features_file, READ_FEATURES) wcon_file = getWCOName(features_file) #with gzip.open(wcon_file, 'wt') as fid: # json.dump(wcon_dict, fid, allow_nan=False) with zipfile.ZipFile(wcon_file, mode='w', compression=zipfile.ZIP_DEFLATED) as zf: zip_name = os.path.basename(wcon_file).replace('.zip', '') wcon_txt = json.dumps(wcon_dict, allow_nan=False, separators=(',', ':')) zf.writestr(zip_name, wcon_txt) print_flush("{} Finised to export to WCON.".format(base_name))
def generateMoviesROI(masked_file, trajectories_data, roi_size=-1, progress_prefix='', bgnd_param={}, progress_refresh_rate_s=20): if len(trajectories_data) == 0: print_flush(progress_prefix + ' No valid data. Exiting.') else: frames = trajectories_data['frame_number'].unique() img_generator = generateImages(masked_file, frames=frames, bgnd_param=bgnd_param) traj_group_by_frame = trajectories_data.groupby('frame_number') progress_time = TimeCounter(progress_prefix, max(frames)) fps = read_fps(masked_file, dflt=25) progress_refresh_rate = int(round(fps * progress_refresh_rate_s)) for ii, (current_frame, img) in enumerate(img_generator): frame_data = traj_group_by_frame.get_group(current_frame) #dictionary where keys are the table row and the values the worms ROIs yield getAllImgROI(img, frame_data, roi_size) if current_frame % progress_refresh_rate == 0: print_flush(progress_time.get_str(current_frame)) print_flush(progress_time.get_str(current_frame))
def generateMoviesROI(masked_file, trajectories_data, bgnd_params={}, roi_size = -1, progress_prefix = '', progress_refresh_rate_s=20): if len(trajectories_data) == 0: print_flush(progress_prefix + ' No valid data. Exiting.') else: frames = trajectories_data['frame_number'].unique() img_generator = generateImages(masked_image_file, frames=frames, bgnd_params=bgnd_params) traj_group_by_frame = trajectories_data.groupby('frame_number') progress_time = timeCounterStr(progress_prefix) with tables.File(masked_file, 'r') as fid: try: expected_fps = fid.get_node('/', 'mask')._v_attrs['expected_fps'] except: expected_fps = 25 progress_refresh_rate = expected_fps*progress_refresh_rate_s for ii, (current_frame, img) in enumerate(img_generator): frame_data = traj_group_by_frame.get_group(current_frame) #dictionary where keys are the table row and the values the worms ROIs yield getAllImgROI(img, frame_data, roi_size) if current_frame % progress_refresh_rate == 0: print_flush(progress_time.getStr(current_frame)) print_flush(progress_time.getStr(current_frame))
def time_to_frame_nb(time_windows, time_units, fps, timestamp, fname): """ Converts the time windows to units of frame numbers (if they were defined in seconds). It also defines the end frame of a window, if the index is set to -1 (end). """ from copy import deepcopy if timestamp.empty: return time_windows_frames = deepcopy(time_windows) if time_units == 'seconds': assert fps != -1, 'Cannot convert time windows to frame numbers. Frames per second ratio not known.' for iwin, win in enumerate(time_windows_frames): for iinterval in range(len(win)): for ilim in range(2): if time_windows_frames[iwin][iinterval][ilim] != -1: time_windows_frames[iwin][iinterval][ilim] = \ round(time_windows_frames[iwin][iinterval][ilim]*fps) last_frame = timestamp.sort_values().iloc[-1] for iwin, win in enumerate(time_windows_frames): for iinterval in range(len(win)): # If a window ends with -1, replace with the frame number of the # last frame (or the start frame of the window+1 if window out of bounds) if time_windows_frames[iwin][iinterval][1] == -1: time_windows_frames[iwin][iinterval][1] = \ max(last_frame+1, time_windows_frames[iwin][iinterval][0]) # If a window is out of bounds, print warning if time_windows_frames[iwin][iinterval][0] > last_frame: print_flush('Warning: The start time of interval ' + '{}/{} '.format(iinterval + 1, len(win)) + 'of window {} '.format(iwin) + 'is out of bounds of file \'{}\'.'.format(fname)) return time_windows_frames
def reformatRigMaskedVideo(original_file, new_file, plugin_param_file, expected_fps, microns_per_pixel): plugin_params = _getWormEnconderParams(plugin_param_file) base_name = original_file.rpartition('.')[0].rpartition(os.sep)[-1] if not _isValidSource(original_file): print_flush(new_file + ' ERROR. File might be corrupt. ' + original_file) return save_full_interval, buffer_size, mask_params = _getReformatParams( plugin_params) with tables.File(original_file, 'r') as fid_old, \ tables.File(new_file, 'w') as fid_new: mask_old = fid_old.get_node('/mask') tot_frames, im_height, im_width = mask_old.shape progress_timer = TimeCounter('Reformating Gecko plugin hdf5 video.', tot_frames) attr_params = dict(expected_fps=expected_fps, microns_per_pixel=microns_per_pixel, is_light_background=True) mask_new, full_new, _ = initMasksGroups(fid_new, tot_frames, im_height, im_width, attr_params, save_full_interval, is_expandable=False) mask_new.attrs['plugin_params'] = json.dumps(plugin_params) img_buff_ini = mask_old[:buffer_size] full_new[0] = img_buff_ini[0] mask_new[:buffer_size] = img_buff_ini * (mask_old[buffer_size] != 0) for frame in range(buffer_size, tot_frames): if frame % save_full_interval != 0: mask_new[frame] = mask_old[frame] else: full_frame_n = frame // save_full_interval img = mask_old[frame] full_new[full_frame_n] = img mask_new[frame] = img * (mask_old[frame - 1] != 0) if frame % 500 == 0: # calculate the progress and put it in a string progress_str = progress_timer.get_str(frame) print_flush(base_name + ' ' + progress_str) print_flush(base_name + ' Compressed video done. Total time:' + progress_timer.get_time_str())
def assignBlobTrajDF(traj_df, max_allowed_dist, area_ratio_lim, base_name=''): def _get_cost_matrix(frame_data, frame_data_prev): coord = frame_data[['coord_x', 'coord_y']].values coord_prev = frame_data_prev[['coord_x', 'coord_y']].values costMatrix = cdist(coord_prev, coord) # calculate the cost matrix # assign a large value to non-valid combinations by area area = frame_data['area'].values area_prev = frame_data_prev['area'].values area_ratio = area_prev[:, None] / area[None, :] area_ratio[np.isnan(area_ratio)] = 1e20 bad_ratio = (area_ratio<area_ratio_lim[0]) | \ (area_ratio>area_ratio_lim[1]) | \ np.isnan(costMatrix) costMatrix[bad_ratio] = 1e20 return costMatrix def _get_prev_ind_match(costMatrix): def _label_bad_ind(indexes, dist, max_allowed_dist): #label as bad the pairs that have a distance larger than max_allowed_dist indexes[dist > max_allowed_dist] = -1 #remove indexes that where assigned twice (either a merge or a split event) uind, counts = np.unique(indexes, return_counts=True) duplicated_ind = uind[counts > 1] bad_ind = np.in1d(indexes, duplicated_ind) indexes[bad_ind] = -1 return indexes #I get the corresponding index in the previous data_frame #I remove pairs located at positions larger than max_allowed_dist #And indexes that where assigned twice or more (split events) map_to_prev = np.argmin(costMatrix, axis=0) #must have dimensions of frame_data min_dist_pp = costMatrix[map_to_prev, np.arange(costMatrix.shape[1])] _label_bad_ind(map_to_prev, min_dist_pp, max_allowed_dist) #here i am looking at in the prev indexes that would have been #assigned twice or more to the next indexes (merge events) map_to_next = np.argmin( costMatrix, axis=1) #must have dimensions of frame_data_prev min_dist_pp = costMatrix[np.arange(costMatrix.shape[0]), map_to_next] _label_bad_ind(map_to_next, min_dist_pp, max_allowed_dist) bad_prev_ind = np.where(map_to_next == -1)[ 0] #techincally either index too far away or duplicated possible_merges = np.in1d(map_to_prev, bad_prev_ind) map_to_prev[possible_merges] = -1 return map_to_prev frame_data_prev = None tot_worms = 0 all_indexes = [] frames_grouped = traj_df.groupby('frame_number') #if isinstance(area_ratio_lim, (float, int)): # area_ratio_lim = (1/area_ratio_lim, area_ratio_lim) progress_time = TimeCounter(base_name + ' Assigning trajectories.', len(frames_grouped)) for frame, frame_data in frames_grouped: #what happens if the frames are not continous? if frame_data_prev is not None: _, prev_traj_ind = all_indexes[-1] costMatrix = _get_cost_matrix(frame_data, frame_data_prev) map_to_prev = _get_prev_ind_match(costMatrix) traj_indexes = np.zeros_like(map_to_prev) unmatched = map_to_prev == -1 matched = ~unmatched #assign matched index from the previous indexes traj_indexes[matched] = prev_traj_ind[map_to_prev[matched]] vv = np.arange(1, np.sum(unmatched) + 1) + tot_worms if vv.size > 0: tot_worms = vv[-1] traj_indexes[unmatched] = vv else: # initialize worm indexes traj_indexes = tot_worms + np.arange(1, len(frame_data) + 1) tot_worms = traj_indexes[-1] all_indexes.append((frame_data.index, traj_indexes)) frame_data_prev = frame_data if frame % 500 == 0: # calculate the progress and put it in a string print_flush(progress_time.get_str(frame)) if all_indexes: row_ind, traj_ind = map(np.concatenate, zip(*all_indexes)) traj_ind = traj_ind[np.argsort(row_ind)] return traj_ind
def compressVideo(video_file, masked_image_file, mask_param, expected_fps=25, microns_per_pixel=None, bgnd_param={}, buffer_size=-1, save_full_interval=-1, max_frame=1e32, is_extract_timestamp=False, fovsplitter_param={}): ''' Compresses video by selecting pixels that are likely to have worms on it and making the rest of the image zero. By creating a large amount of redundant data, any lossless compression algorithm will dramatically increase its efficiency. The masked images are saved as hdf5 with gzip compression. The mask is calculated over a minimum projection of an image stack. This projection preserves darker regions (or brighter regions, in the case of fluorescent labelling) where the worm has more probability to be located. Additionally it has the advantage of reducing the processing load by only requiring to calculate the mask once per image stack. video_file -- original video file masked_image_file -- buffer_size -- size of the image stack used to calculate the minimal projection and the mask save_full_interval -- have often a full image is saved max_frame -- last frame saved (default a very large number, so it goes until the end of the video) mask_param -- parameters used to calculate the mask ''' #get the default values if there is any bad parameter output = compress_defaults(masked_image_file, expected_fps, buffer_size=buffer_size, save_full_interval=save_full_interval) buffer_size = output['buffer_size'] save_full_interval = output['save_full_interval'] if len(bgnd_param) > 0: is_bgnd_subtraction = True assert bgnd_param['buff_size'] > 0 and bgnd_param['frame_gap'] > 0 else: is_bgnd_subtraction = False if len(fovsplitter_param) > 0: is_fov_tosplit = True assert all(key in fovsplitter_param for key in ['total_n_wells', 'whichsideup', 'well_shape']) assert fovsplitter_param['total_n_wells'] > 0 else: is_fov_tosplit = False # processes identifier. base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] # select the video reader class according to the file type. vid = selectVideoReader(video_file) # delete any previous if it existed with tables.File(masked_image_file, "w") as mask_fid: pass #Extract metadata if is_extract_timestamp: # extract and store video metadata using ffprobe #NOTE: i cannot calculate /timestamp until i am sure of the total number of frames print_flush(base_name + ' Extracting video metadata...') expected_frames = store_meta_data(video_file, masked_image_file) else: expected_frames = 1 # Initialize background subtraction if required if is_bgnd_subtraction: print_flush(base_name + ' Initializing background subtraction.') bgnd_subtractor = BackgroundSubtractorVideo(video_file, **bgnd_param) # intialize some variables max_intensity, min_intensity = np.nan, np.nan frame_number = 0 full_frame_number = 0 image_prev = np.zeros([]) # Initialise FOV splitting if needed if is_bgnd_subtraction: img_fov = bgnd_subtractor.bgnd.astype(np.uint8) else: ret, img_fov = vid.read() # close and reopen the video, to restart from the beginning vid.release() vid = selectVideoReader(video_file) if is_fov_tosplit: # TODO: change class creator so it only needs the video name? by using # Tierpsy's functions such as selectVideoReader it can then read the first image by itself camera_serial = parse_camera_serial(masked_image_file) fovsplitter = FOVMultiWellsSplitter(img_fov, camera_serial=camera_serial, px2um=microns_per_pixel, **fovsplitter_param) wells_mask = fovsplitter.wells_mask else: wells_mask = None # initialize timers print_flush(base_name + ' Starting video compression.') if expected_frames == 1: progressTime = TimeCounter('Compressing video.') else: #if we know the number of frames display it in the progress progressTime = TimeCounter('Compressing video.', expected_frames) with tables.File(masked_image_file, "r+") as mask_fid: #initialize masks groups attr_params = dict(expected_fps=expected_fps, microns_per_pixel=microns_per_pixel, is_light_background=int( mask_param['is_light_background'])) mask_dataset, full_dataset, mean_intensity = initMasksGroups( mask_fid, expected_frames, vid.height, vid.width, attr_params, save_full_interval) if is_bgnd_subtraction: bg_dataset = createImgGroup(mask_fid, "/bgnd", 1, vid.height, vid.width, is_expandable=False) bg_dataset[0, :, :] = img_fov if vid.dtype != np.uint8: # this will worm as flags to be sure that the normalization took place. normalization_range = mask_fid.create_earray( '/', 'normalization_range', atom=tables.Float32Atom(), shape=(0, 2), expectedrows=expected_frames, filters=TABLE_FILTERS) while frame_number < max_frame: ret, image = vid.read() if ret != 0: # increase frame number frame_number += 1 # opencv can give an artificial rgb image. Let's get it back to # gray scale. if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if image.dtype != np.uint8: # normalise image intensities if the data type is other # than uint8 image, img_norm_range = normalizeImage(image) normalization_range.append(img_norm_range) #limit the image range to 1 to 255, 0 is a reserved value for the background assert image.dtype == np.uint8 image = np.clip(image, 1, 255) # Add a full frame every save_full_interval if frame_number % save_full_interval == 1: full_dataset.append(image[np.newaxis, :, :]) full_frame_number += 1 # buffer index ind_buff = (frame_number - 1) % buffer_size # initialize the buffer when the index correspond to 0 if ind_buff == 0: Ibuff = np.zeros((buffer_size, vid.height, vid.width), dtype=np.uint8) # add image to the buffer Ibuff[ind_buff, :, :] = image.copy() mean_int = np.mean(image) assert mean_int >= 0 mean_intensity.append(np.array([mean_int])) else: # sometimes the last image is all zeros, control for this case if np.all(Ibuff[ind_buff] == 0): frame_number -= 1 ind_buff -= 1 # close the buffer Ibuff = Ibuff[:ind_buff + 1] # mask buffer and save data into the hdf5 file if (ind_buff == buffer_size - 1 or ret == 0) and Ibuff.size > 0: if is_bgnd_subtraction: Ibuff_b = bgnd_subtractor.apply(Ibuff, frame_number) else: Ibuff_b = Ibuff #calculate the max/min in the of the buffer img_reduce = reduceBuffer(Ibuff_b, mask_param['is_light_background']) mask = getROIMask(img_reduce, wells_mask=wells_mask, **mask_param) Ibuff *= mask # now apply the well_mask if is MWP if is_fov_tosplit: fovsplitter.apply_wells_mask( Ibuff) # Ibuff will be modified after this # add buffer to the hdf5 file frame_first_buff = frame_number - Ibuff.shape[0] mask_dataset.append(Ibuff) if frame_number % 500 == 0: # calculate the progress and put it in a string progress_str = progressTime.get_str(frame_number) print_flush(base_name + ' ' + progress_str) # finish process if ret == 0: break # now that the whole video is read, we definitely have a better estimate # for its number of frames. so set the save_interval again if is_bgnd_subtraction: # bg_dataset._v_attrs['save_interval'] = len(vid) # the above line is not accurate when using ffmpeg, # it's just safer to do: bg_dataset._v_attrs['save_interval'] = mask_dataset.shape[0] # close the video vid.release() # save fovsplitting data if is_fov_tosplit: fovsplitter.write_fov_wells_to_file(masked_image_file) if fovsplitter.is_dubious: print(f'Check {masked_image_file} for plate alignment') read_and_save_timestamp(masked_image_file) print_flush(base_name + ' Compressed video done.')
skeletons_file = mask_video.replace('MaskedVideos','Results').replace('.hdf5', '_skeletons.hdf5') if not os.path.exists(skeletons_file): continue base_name = get_base_name(mask_video) print('{} of {} {}'.format(ivid+1, len(fnames), base_name)) #%% trajectories_data, splitted_points = \ fix_wrong_merges(mask_video, skeletons_file, min_area_limit ) #%% print_flush('{} Creating trajectories graph network.'.format(base_name)) node_weights, DG, trajectories_data = \ get_node_weights(trajectories_data, mask_video, **args_graph) trajectories_data['cluster_size'] = trajectories_data['worm_index_auto'].map(node_weights) #if np.any(trajectories_data['cluster_size']<0): # print(node_weights) def _label(x): if x == 0: return WLAB['BAD'] elif x == 1: return WLAB['WORM'] elif x > 1:
def filterByPopulationMorphology(skeletons_file, good_skel_row, critical_alpha=0.01): base_name = get_base_name(skeletons_file) progress_timer = TimeCounter('') print_flush(base_name + ' Filter Skeletons: Starting...') with pd.HDFStore(skeletons_file, 'r') as table_fid: trajectories_data = table_fid['/trajectories_data'] if not 'is_good_skel' in trajectories_data: trajectories_data['is_good_skel'] = trajectories_data['has_skeleton'] if good_skel_row.size > 0: # nothing to do if there are not valid skeletons left. print_flush( base_name + ' Filter Skeletons: Reading features for outlier identification.') #add possible missing fields that were con calculated in older versions of the software _addMissingFields(skeletons_file) # calculate classifier for the outliers nodes4fit = ['/skeleton_length', '/contour_area', '/width_midbody'] worm_morph = _h_nodes2Array(skeletons_file, nodes4fit, -1) #worm_morph[~trajectories_data['is_good_skel'].values] = np.nan feats4fit = [worm_morph] #feats4fit = _h_readFeat2Check(skeletons_file) print_flush(base_name + ' Filter Skeletons: Calculating outliers. Total time:' + progress_timer.get_time_str()) tot_rows2fit = feats4fit[0].shape[0] # check all the data to fit has the same size in the first axis assert all(tot_rows2fit == featdat.shape[0] for featdat in feats4fit) outliers_rob = np.zeros(tot_rows2fit, np.bool) outliers_flag = np.zeros(tot_rows2fit, np.int) assert len(feats4fit) < 64 # otherwise the outlier flag will not work for out_ind, dat in enumerate(feats4fit): maha, out_d, lim_d = _h_getMahalanobisRobust( dat, critical_alpha, good_skel_row) outliers_rob = outliers_rob | out_d # flag the outlier flag by turning on the corresponding bit outliers_flag += (out_d) * (2**out_ind) print_flush( base_name + ' Filter Skeletons: Labeling valid skeletons. Total time:' + progress_timer.get_time_str()) # labeled rows of valid individual skeletons as GOOD_SKE trajectories_data['is_good_skel'] &= ~outliers_rob trajectories_data['skel_outliers_flag'] = outliers_flag # Save the new is_good_skel column if trajectories_data['is_good_skel'].dtypes == bool: trajectories_data['is_good_skel'] = trajectories_data[ 'is_good_skel'].astype(np.uint8) save_modified_table(skeletons_file, trajectories_data, 'trajectories_data') print_flush(base_name + ' Filter Skeletons: Finished. Total time:' + progress_timer.get_time_str())
trajectories_file = ff[:-5] + '_trajectories.hdf5' skeletons_file = ff[:-5] + '_skeletons.hdf5' intensities_file = ff[:-5] + '_intensities.hdf5' # check the file finished in the correct step # with tables.File(skeletons_file, 'r') as fid: # assert fid.get_node('/skeleton')._v_attrs['has_finished'] >= 4 with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] grouped_trajectories = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_trajectories) # variables to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] print_flush(base_name + " Checking if the final Head-Tail orientation is correct") for index_n, (worm_index, trajectories_worm) in enumerate(grouped_trajectories): p_tot, skel_group, int_group = checkFinalOrientation( skeletons_file, intensities_file, trajectories_worm, head_tail_param) if p_tot < 0.5: switchBlocks(skel_group, skeletons_file, int_group, intensities_file)
base_name = get_base_name(skeletons_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] blob_features = ske_file_id['/blob_features'] #I want to update blob_features blob_features['signed_speed'] = np.nan progress_timer = TimeCounter('') with tables.File(skeletons_file, 'r') as fid: skeletons = fid.get_node('/skeleton') grouped_by_index = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_by_index) for ii, (worm_index, worm_data) in enumerate(grouped_by_index): feats = blob_features.loc[worm_data.index] skel_coords = skeletons[worm_data.index] xx = feats['coord_x'] yy = feats['coord_y'] signed_speed = _get_signed_velocity(xx, yy, skel_coords) blob_features.loc[worm_data.index[:-1], 'signed_speed'] = signed_speed if ii % 100 == 0: dd = " Calculating signed speed. Worm %i of %i." % (ii + 1, tot_worms) dd = base_name + dd + ' Total time:' + progress_timer.get_time_str() print_flush(dd) save_modified_table(skeletons_file, blob_features, 'blob_features')
def get_ffprobe_metadata(video_file): if not os.path.exists(video_file): raise FileNotFoundError(video_file) if not os.path.exists(FFPROBE_CMD): raise FileNotFoundError('ffprobe do not found.') command = [ FFPROBE_CMD, '-v', 'error', '-show_frames', '-print_format', 'compact', video_file] base_name = video_file.rpartition('.')[0].rpartition(os.sep)[-1] progressTime = TimeCounter(base_name + ' Extracting video metadata.') frame_number = 0 buff = [] buff_err = [] proc = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE) buf_reader = ReadEnqueue(proc.stdout, timeout=1) buf_reader_err = ReadEnqueue(proc.stderr) while proc.poll() is None: # read line without blocking line = buf_reader.read() if line is None: print('cannot read') else: buff.append(line) if "media_type" in line: #i use the filed "media_type" as a proxy for frame number (just in case the media does not have frame number) frame_number += 1 if frame_number % 500 == 0: print_flush(progressTime.get_str(frame_number)) line = buf_reader_err.read() if line is not None: buff_err.append(None) #the buff is in the shape # frame|feat1=val1|feat2=val2|feat3=val3\n # I want to store each property as a vector dat = [[d.split('=') for d in x.split('|')] for x in ''.join(buff).split('\n')] # use the first frame as reference frame_fields = [x[0] for x in dat[0] if len(x) == 2] # store data into numpy arrays video_metadata = OrderedDict() for row in dat: row_fields = [x[0] for x in dat[0] if len(x) == 2] for dd in row: if (len(dd) != 2) or (not dd[0] in frame_fields): continue field, value = dd if not field in video_metadata: video_metadata[field] = [] try: # if possible convert the data into float value = float(value) except (ValueError, TypeError): if value == 'N/A': value = np.nan else: # pytables does not support unicode strings (python3) #the str before is to convert a possible dictionary into a string before converting it to bytes value = bytes(str(value), 'utf-8') video_metadata[field].append(value) #convert all the lists into numpy arrays video_metadata = {field:np.asarray(values) for field,values in video_metadata.items()} #convert data into a recarray to store in pytables video_metadata = dict2recarray(video_metadata) #sometimes the last frame throws a nan in the timestamp. I want to remove it if np.isnan(video_metadata[-1]['best_effort_timestamp']): video_metadata = video_metadata[:-1] #if there is still nan's raise an error if np.any(np.isnan(video_metadata['best_effort_timestamp'])): raise ValueError('The timestamp contains nan values') return video_metadata
def _displayProgress(n): args = (n + 1, len(df_files), progress_timer.get_time_str()) dd = "Extracting features summary. " dd += "File {} of {} done. Total time: {}".format(*args) print_flush(dd)
def getWormFeaturesFilt( skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col( pos=ii) for ii, (feat, _) in enumerate( wStats.feat_timeseries_dtype)} table_timeseries = features_fid.create_table( '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=( 0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom( shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV} return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush( base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0] with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array( '/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.') return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable( skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats) #get splitted features splitted_worms = [x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV} #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group( group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray( worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table( f_node, stat, obj = stats_df, filters = TABLE_FILTERS ) feat_stat_split = features_fid.create_table( f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table( '/', 'features_means', obj = stats_df, filters = TABLE_FILTERS ) features_fid.create_table( '/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS ) print_flush( base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
def alignStageMotion(masked_file, skeletons_file): base_name = get_base_name(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% fps = read_fps(skeletons_file) #%% # Open the information file and read the tracking delay time. # (help from segworm findStageMovement) # 2. The info file contains the tracking delay. This delay represents the # minimum time between stage movements and, conversely, the maximum time it # takes for a stage movement to complete. If the delay is too small, the # stage movements become chaotic. We load the value for the delay. with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() g_mask = fid.get_node('/mask') tot_frames = g_mask.shape[0] # Read the scale conversions, we would need this when we want to convert the pixels into microns pixelPerMicronX = 1 / g_mask._v_attrs['pixels2microns_x'] pixelPerMicronY = 1 / g_mask._v_attrs['pixels2microns_y'] with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000 delay_frames = np.ceil(delay_time * fps) normScale = np.sqrt((pixelPerMicronX**2 + pixelPerMicronX**2) / 2) pixelPerMicronScale = normScale * np.array( (np.sign(pixelPerMicronX), np.sign(pixelPerMicronY))) #% Compute the rotation matrix. #%rotation = 1; angle = np.arctan(pixelPerMicronY / pixelPerMicronX) if angle > 0: angle = np.pi / 4 - angle else: angle = np.pi / 4 + angle cosAngle = np.cos(angle) sinAngle = np.sin(angle) rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle))) #%% #% Ev's code uses the full vectors without dropping frames #% 1. video2Diff differentiates a video frame by frame and outputs the #% differential variance. We load these frame differences. frame_diffs_d = getFrameDiffVar(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% Read the media times and locations from the log file. #% (help from segworm findStageMovement) #% 3. The log file contains the initial stage location at media time 0 as #% well as the subsequent media times and locations per stage movement. Our #% algorithm attempts to match the frame differences in the video (see step #% 1) to the media times in this log file. Therefore, we load these media #% times and stage locations. #%from the .log.csv file mediaTimes = stage_log['stage_time'].values locations = stage_log[['stage_x', 'stage_y']].values #ini stage movement fields with tables.File(skeletons_file, 'r+') as fid: # delete data from previous analysis if any if '/stage_movement' in fid: fid.remove_node('/stage_movement', recursive=True) g_stage_movement = fid.create_group('/', 'stage_movement') g_stage_movement._v_attrs['has_finished'] = 0 #read and prepare timestamp try: video_timestamp_ind = fid.get_node('/timestamp/raw')[:] if np.any(np.isnan(video_timestamp_ind)): raise ValueError() else: video_timestamp_ind = video_timestamp_ind.astype(np.int) except (tables.exceptions.NoSuchNodeError, ValueError): warnings.warn( 'It is corrupt or do not exist. I will assume no dropped frames and deduce it from the number of frames.' ) video_timestamp_ind = np.arange(tot_frames, dtype=np.int) #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: #%i can tolerate one frame (two with respect to the frame_diff) #%extra at the end of the timestamp video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1] dd = video_timestamp_ind - np.min(video_timestamp_ind) - 1 #shift data dd = dd[dd >= 0] #%% if frame_diffs_d.size != dd.size: raise ValueError( 'Number of timestamps do not match the number of frames in the movie.' ) frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan) frame_diffs[dd] = frame_diffs_d #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: # I am saving this data before for debugging purposes g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d) g_stage_movement._v_attrs['fps'] = fps g_stage_movement._v_attrs['delay_frames'] = delay_frames g_stage_movement._v_attrs[ 'microns_per_pixel_scale'] = pixelPerMicronScale g_stage_movement._v_attrs['rotation_matrix'] = rotation_matrix #%% try to run the aligment and return empty data if it fails is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps) stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d) fid.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d) g_stage_movement._v_attrs['has_finished'] = 1 _h_add_stage_position_pix(masked_file, skeletons_file) print_flush(base_name + ' Aligning Stage Motion. Finished.')
def correctHeadTailIntensity(skeletons_file, intensities_file, smooth_W=5, gap_size=-1, min_block_size=-1, local_avg_win=-1, min_frac_in=0.85, head_tail_param={}, head_tail_int_method='MEDIAN_INT'): output = head_tail_int_defaults(skeletons_file, smooth_W=smooth_W, gap_size=gap_size, min_block_size=min_block_size, local_avg_win=local_avg_win) smooth_W = output['smooth_W'] gap_size = output['gap_size'] min_block_size = output['min_block_size'] local_avg_win = output['local_avg_win'] head_tail_param = head_tail_defaults(skeletons_file, **head_tail_param) # get the trajectories table with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] # at this point the int_map_id with the intensity maps indexes must # exist in the table assert 'int_map_id' in trajectories_data grouped_trajectories = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_trajectories) # variables to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] progress_timer = TimeCounter('') bad_worms = [ ] # worms with not enough difference between the normal and inverted median intensity profile switched_blocks = [] # data from the blocks that were switched #ind2check = [765] for index_n, (worm_index, trajectories_worm) in enumerate(grouped_trajectories): # if not worm_index in ind2check: continue if index_n % 10 == 0: dd = " Correcting Head-Tail using intensity profiles. Worm %i of %i." % ( index_n + 1, tot_worms) dd = base_name + dd + ' Total time:' + progress_timer.get_time_str( ) print_flush(dd) # correct head tail using the intensity profiles dd = correctHeadTailIntWorm(trajectories_worm, skeletons_file, intensities_file, smooth_W, gap_size, min_block_size, local_avg_win, min_frac_in, head_tail_int_method) switched_blocks += [(worm_index, t0, tf) for t0, tf in dd] # check that the final orientation is correct, otherwise switch the # whole trajectory if head_tail_int_method != 'HEAD_BRIGHTER': p_tot, skel_group, int_group = checkFinalOrientation( skeletons_file, intensities_file, trajectories_worm, min_block_size, head_tail_param) if p_tot < 0.5: switchBlocks(skel_group, skeletons_file, int_group, intensities_file) # label the process as finished and store the indexes of the switched worms with tables.File(skeletons_file, 'r+') as fid: if not '/intensity_analysis' in fid: fid.create_group('/', 'intensity_analysis') if '/intensity_analysis/bad_worms' in fid: fid.remove_node('/intensity_analysis/min_block_size/bad_worms') if '/intensity_analysis/switched_head_tail' in fid: fid.remove_node('/intensity_analysis/switched_head_tail') if bad_worms: fid.create_array('/intensity_analysis', 'bad_worms', np.array(bad_worms)) if switched_blocks: # to rec array switched_blocks = np.array(switched_blocks, dtype=[('worm_index', np.int), ('ini_frame', np.int), ('last_frame', np.int)]) fid.create_table('/intensity_analysis', 'switched_head_tail', switched_blocks) fid.get_node('/skeleton')._v_attrs['has_finished'] = 4 print_flush(base_name + ' Head-Tail correction using intensity profiles finished: ' + progress_timer.get_time_str())
def alignStageMotion_new(masked_file, skeletons_file): fps = read_fps(skeletons_file) with tables.File(skeletons_file, 'r+') as fid: # delete data from previous analysis if any if not '/stage_movement': g_stage_movement = fid.create_group('/', 'stage_movement') else: g_stage_movement = fid.get_node('/stage_movement') for field in ['stage_vec', 'is_stage_move', 'frame_diffs']: if field in g_stage_movement: fid.remove_node(g_stage_movement, field) g_stage_movement._v_attrs['has_finished'] = 0 video_timestamp_ind = fid.get_node('/timestamp/raw')[:] #I can tolerate a nan in the last position if np.isnan(video_timestamp_ind[-1]): video_timestamp_ind[-1] = video_timestamp_ind[-2] if np.any(np.isnan(video_timestamp_ind)): exit_flag = 80; warnings.warns('The timestamp is corrupt or do not exist.\n No stage correction processed. Exiting with has_finished flag %i.' , exit_flag) #turn on the has_finished flag and exit g_stage_movement._v_attrs['has_finished'] = exit_flag return video_timestamp_ind = video_timestamp_ind.astype(np.int) # Open the information file and read the tracking delay time. # (help from segworm findStageMovement) # 2. The info file contains the tracking delay. This delay represents the # minimum time between stage movements and, conversely, the maximum time it # takes for a stage movement to complete. If the delay is too small, the # stage movements become chaotic. We load the value for the delay. with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() g_mask = fid.get_node('/mask') #%% Read the scale conversions, we would need this when we want to convert the pixels into microns pixelPerMicronX = 1/g_mask._v_attrs['pixels2microns_x'] pixelPerMicronY = 1/g_mask._v_attrs['pixels2microns_y'] with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000; delay_frames = np.ceil(delay_time * fps); normScale = np.sqrt((pixelPerMicronX ^ 2 + pixelPerMicronX ^ 2) / 2); pixelPerMicronScale = normScale * np.array((np.sign(pixelPerMicronX), np.sign(pixelPerMicronY))); #% Compute the rotation matrix. #%rotation = 1; angle = np.atan(pixelPerMicronY / pixelPerMicronX); if angle > 0: angle = np.pi / 4 - angle; else: angle = np.pi / 4 + angle; cosAngle = np.cos(angle); sinAngle = np.sin(angle); rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle))); #%% #% Ev's code uses the full vectors without dropping frames #% 1. video2Diff differentiates a video frame by frame and outputs the #% differential variance. We load these frame differences. frame_diffs_d = getFrameDiffVar(masked_file); #%% Read the media times and locations from the log file. #% (help from segworm findStageMovement) #% 3. The log file contains the initial stage location at media time 0 as #% well as the subsequent media times and locations per stage movement. Our #% algorithm attempts to match the frame differences in the video (see step #% 1) to the media times in this log file. Therefore, we load these media #% times and stage locations. #%from the .log.csv file mediaTimes = stage_log['stage_time'].values; locations = stage_log[['stage_x', 'stage_y']].values; #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: #%i can tolerate one frame (two with respect to the frame_diff) #%extra at the end of the timestamp video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1]; frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan); dd = video_timestamp_ind - np.min(video_timestamp_ind); #shift data dd = dd[dd>=0]; if frame_diffs_d.size != dd.size: exit_flag = 81; warnings.warn('Number of timestamps do not match the number read movie frames.\n No stage correction processed. Exiting with has_finished flag %i.', exit_flag) #%turn on the has_finished flag and exit with tables.File(skeletons_file, 'r+') as fid: fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag return frame_diffs[dd] = frame_diffs_d; #%% try to run the aligment and return empty data if it fails try: is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps); exit_flag = 1; except: exit_flag = 82; warnings.warn('Returning all nan stage vector. Exiting with has_finished flag {}'.format(exit_flag)) with tables.File(skeletons_file, 'r+') as fid: fid.get_node('/stage_movement')._v_attrs['has_finished'] = exit_flag #%remove the if we want to create an empty is_stage_move = np.ones(frame_diffs.size+1); stage_locations = []; movesI = []; #%% stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: g_stage_movement = fid.get_node('/stage_movement') g_stage_movement.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d) g_stage_movement.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d) g_stage_movement.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d) g_stage_movement._v_atttrs['fps'] = fps g_stage_movement._v_atttrs['delay_frames'] = delay_frames g_stage_movement._v_atttrs['microns_per_pixel_scale'] = pixelPerMicronScale g_stage_movement._v_atttrs['rotation_matrix'] = rotation_matrix g_stage_movement._v_attrs['has_finished'] = 1 print_flush('Finished.')