def preeditimage(input_file, output_dir, params): """ Segment the specified grayscale images, and save the binary image to file. First, clean the image by removing the background and filtering it, then find the edges and threshold it to convert it to a binary image. Extract and verify the data from this image. args: input_file (file): input directory of raw data output_dir (path): output directory to save file params (dict): input parameters """ # Do not overwrite existing output output_file = os.path.join(output_dir, os.path.basename(input_file)) if os.path.isfile(output_file): img = imread(output_file) else: # Segment the grayscale image and save to file img = segment.main(imread(input_file), params['segment']) imsave(output_file, img) print ' - segment: ' + time.asctime() # Do not overwrite existing output output_file2 = os.path.splitext(output_file)[0] + '.pickle' if os.path.isfile(output_file2): return # Extract properties from the labeled image and save as a DataFrame data = extract.preedit(img, params['extract']) columns = ('Area', 'BoundingBox', 'Centroid', 'EdgeSpline', 'FourierFit', 'Length', 'MidSpline', 'Perimeter', 'StalkedPole', 'SwarmerPole') f = read.getframenum(input_file, params['segment']['pattern']) if data: # Make MultiIndex with frame and label info j = [f] * len(data) k = [v['Label'] for v in data] else: # Create empty DataFrame data = [dict.fromkeys(columns, np.nan)] j = [f] k = [-1] index = MultiIndex.from_arrays((j, k), names=('Frame', 'Label')) df = DataFrame(data, columns=columns, index=index) verify.preedit(df, params['verify']) df.to_pickle(output_file2) print ' - extract: ' + time.asctime()
def load_posn(self): """ Load data for the current position. """ self.posn_dir = os.path.join(self.analyses_dir, self.posns[self.posn_idx]) b = os.path.join(self.posn_dir, 'blocks') # Read in values for the current position self.data_file = os.path.join(self.posn_dir, 'edits.pickle') self.data = read_pickle(self.data_file) # Read in values from the log file log_file = os.path.join(self.posn_dir, 'log.pickle') log_data = pickle.load(open(log_file, 'rb')) self.img_shape = log_data['image']['phase']['shape'] self.img_dtype = log_data['image']['phase']['dtype'] self.pumps = log_data['pumps'] self.TraceList = list(self.data['Trace']) self.SavedList = [ v for i, v in self.data['Trace'].iteritems() if self.data['Saved'][i] ] self.num_traces = len(self.TraceList) if self.num_traces < 1: return self.num_frames = len(self.data.ix[0]['Label']) self.frames = np.arange(self.num_frames) self.time_phase = log_data['phase'][:self.num_frames] self.time = self.time_phase / 60 if log_data.has_key('fluor'): max_time = np.max(self.time_phase) num_frames_fluor = np.argmin( np.abs(log_data['fluor'] - max_time)) + 1 self.time_fluor = log_data['fluor'][:num_frames_fluor] # Unzip phase-contrast image files and read in names of image files old_dir = os.curdir self.files = [''] * self.num_frames for v in read.listdirs(b, read.PATTERN['blockdir']): # Extract all .tif images in the input directory os.chdir(os.path.join(b, v)) zipfile.ZipFile('PhaseSegment.zip').extractall() for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']): i = read.getframenum(f, read.PATTERN['phasetif']) if i < self.num_frames: self.files[i] = os.path.join(b, v, 'PhaseSegment', f) os.chdir(old_dir)
def load_posn(self): """ Load data for the current position. """ self.posn_dir = os.path.join(self.analyses_dir, self.posns[self.posn_idx]) b = os.path.join(self.posn_dir, 'blocks') # Read in values for the current position self.data_file = os.path.join(self.posn_dir, 'edits.pickle') self.data = read_pickle(self.data_file) # Read in values from the log file log_file = os.path.join(self.posn_dir, 'log.pickle') log_data = pickle.load(open(log_file, 'rb')) self.img_shape = log_data['image']['phase']['shape'] self.img_dtype = log_data['image']['phase']['dtype'] self.pumps = log_data['pumps'] self.TraceList = list(self.data['Trace']) self.SavedList = [v for i, v in self.data['Trace'].iteritems() if self.data['Saved'][i]] self.num_traces = len(self.TraceList) if self.num_traces < 1: return self.num_frames = len(self.data.ix[0]['Label']) self.frames = np.arange(self.num_frames) self.time_phase = log_data['phase'][:self.num_frames] self.time = self.time_phase / 60 if log_data.has_key('fluor'): max_time = np.max(self.time_phase) num_frames_fluor = np.argmin(np.abs(log_data['fluor']-max_time)) + 1 self.time_fluor = log_data['fluor'][:num_frames_fluor] # Unzip phase-contrast image files and read in names of image files old_dir = os.curdir self.files = [''] * self.num_frames for v in read.listdirs(b, read.PATTERN['blockdir']): # Extract all .tif images in the input directory os.chdir(os.path.join(b, v)) zipfile.ZipFile('PhaseSegment.zip').extractall() for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']): i = read.getframenum(f, read.PATTERN['phasetif']) if i < self.num_frames: self.files[i] = os.path.join(b, v, 'PhaseSegment', f) os.chdir(old_dir)
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop-1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[ sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop - 1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()