def preeditimage(input_file, output_dir, params):
    """
    Segment the specified grayscale images, and save the binary image to file.
    First, clean the image by removing the background and filtering it, then 
    find the edges and threshold it to convert it to a binary image. Extract 
    and verify the data from this image.

    args:
        input_file (file): input directory of raw data
        output_dir (path): output directory to save file
        params (dict): input parameters

    """

    # Do not overwrite existing output
    output_file = os.path.join(output_dir, os.path.basename(input_file))
    if os.path.isfile(output_file):
        img = imread(output_file)
    else:
        # Segment the grayscale image and save to file
        img = segment.main(imread(input_file), params['segment'])
        imsave(output_file, img)

    print ' - segment: ' + time.asctime()

    # Do not overwrite existing output
    output_file2 = os.path.splitext(output_file)[0] + '.pickle'
    if os.path.isfile(output_file2):
        return

    # Extract properties from the labeled image and save as a DataFrame
    data = extract.preedit(img, params['extract'])
    columns = ('Area', 'BoundingBox', 'Centroid', 'EdgeSpline', 'FourierFit',
               'Length', 'MidSpline', 'Perimeter', 'StalkedPole',
               'SwarmerPole')

    f = read.getframenum(input_file, params['segment']['pattern'])
    if data:
        # Make MultiIndex with frame and label info
        j = [f] * len(data)
        k = [v['Label'] for v in data]
    else:
        # Create empty DataFrame
        data = [dict.fromkeys(columns, np.nan)]
        j = [f]
        k = [-1]
    index = MultiIndex.from_arrays((j, k), names=('Frame', 'Label'))
    df = DataFrame(data, columns=columns, index=index)
    verify.preedit(df, params['verify'])
    df.to_pickle(output_file2)

    print ' - extract: ' + time.asctime()
def preeditimage(input_file, output_dir, params):
    """
    Segment the specified grayscale images, and save the binary image to file.
    First, clean the image by removing the background and filtering it, then 
    find the edges and threshold it to convert it to a binary image. Extract 
    and verify the data from this image.

    args:
        input_file (file): input directory of raw data
        output_dir (path): output directory to save file
        params (dict): input parameters

    """

    # Do not overwrite existing output
    output_file = os.path.join(output_dir, os.path.basename(input_file))
    if os.path.isfile(output_file):
        img = imread(output_file)
    else:
        # Segment the grayscale image and save to file
        img = segment.main(imread(input_file), params['segment'])
        imsave(output_file, img)

    print ' - segment: ' + time.asctime()

    # Do not overwrite existing output
    output_file2 = os.path.splitext(output_file)[0] + '.pickle'
    if os.path.isfile(output_file2):
        return

    # Extract properties from the labeled image and save as a DataFrame
    data = extract.preedit(img, params['extract'])
    columns = ('Area', 'BoundingBox', 'Centroid', 'EdgeSpline', 'FourierFit',
               'Length', 'MidSpline', 'Perimeter', 'StalkedPole', 'SwarmerPole')

    f = read.getframenum(input_file, params['segment']['pattern'])
    if data:
        # Make MultiIndex with frame and label info
        j = [f] * len(data)
        k = [v['Label'] for v in data]
    else:
        # Create empty DataFrame
        data = [dict.fromkeys(columns, np.nan)]
        j = [f]
        k = [-1]
    index = MultiIndex.from_arrays((j, k), names=('Frame', 'Label'))
    df = DataFrame(data, columns=columns, index=index)
    verify.preedit(df, params['verify'])
    df.to_pickle(output_file2)

    print ' - extract: ' + time.asctime()
    def load_posn(self):
        """
        Load data for the current position.

        """
        self.posn_dir = os.path.join(self.analyses_dir,
                                     self.posns[self.posn_idx])
        b = os.path.join(self.posn_dir, 'blocks')

        # Read in values for the current position
        self.data_file = os.path.join(self.posn_dir, 'edits.pickle')
        self.data = read_pickle(self.data_file)

        # Read in values from the log file
        log_file = os.path.join(self.posn_dir, 'log.pickle')
        log_data = pickle.load(open(log_file, 'rb'))
        self.img_shape = log_data['image']['phase']['shape']
        self.img_dtype = log_data['image']['phase']['dtype']
        self.pumps = log_data['pumps']

        self.TraceList = list(self.data['Trace'])
        self.SavedList = [
            v for i, v in self.data['Trace'].iteritems()
            if self.data['Saved'][i]
        ]
        self.num_traces = len(self.TraceList)
        if self.num_traces < 1:
            return
        self.num_frames = len(self.data.ix[0]['Label'])
        self.frames = np.arange(self.num_frames)
        self.time_phase = log_data['phase'][:self.num_frames]
        self.time = self.time_phase / 60
        if log_data.has_key('fluor'):
            max_time = np.max(self.time_phase)
            num_frames_fluor = np.argmin(
                np.abs(log_data['fluor'] - max_time)) + 1
            self.time_fluor = log_data['fluor'][:num_frames_fluor]

        # Unzip phase-contrast image files and read in names of image files
        old_dir = os.curdir
        self.files = [''] * self.num_frames
        for v in read.listdirs(b, read.PATTERN['blockdir']):
            # Extract all .tif images in the input directory
            os.chdir(os.path.join(b, v))
            zipfile.ZipFile('PhaseSegment.zip').extractall()
            for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']):
                i = read.getframenum(f, read.PATTERN['phasetif'])
                if i < self.num_frames:
                    self.files[i] = os.path.join(b, v, 'PhaseSegment', f)
            os.chdir(old_dir)
    def load_posn(self):
        """
        Load data for the current position.

        """
        self.posn_dir = os.path.join(self.analyses_dir,
                                     self.posns[self.posn_idx])
        b = os.path.join(self.posn_dir, 'blocks')

        # Read in values for the current position
        self.data_file = os.path.join(self.posn_dir, 'edits.pickle')
        self.data = read_pickle(self.data_file)

        # Read in values from the log file
        log_file = os.path.join(self.posn_dir, 'log.pickle')
        log_data = pickle.load(open(log_file, 'rb'))
        self.img_shape = log_data['image']['phase']['shape']
        self.img_dtype = log_data['image']['phase']['dtype']
        self.pumps = log_data['pumps']

        self.TraceList = list(self.data['Trace'])
        self.SavedList = [v for i, v in self.data['Trace'].iteritems() if
            self.data['Saved'][i]]
        self.num_traces = len(self.TraceList)
        if self.num_traces < 1:
            return
        self.num_frames = len(self.data.ix[0]['Label'])
        self.frames = np.arange(self.num_frames)
        self.time_phase = log_data['phase'][:self.num_frames]
        self.time = self.time_phase / 60
        if log_data.has_key('fluor'):
            max_time = np.max(self.time_phase)
            num_frames_fluor = np.argmin(np.abs(log_data['fluor']-max_time)) + 1
            self.time_fluor = log_data['fluor'][:num_frames_fluor]

        # Unzip phase-contrast image files and read in names of image files
        old_dir = os.curdir
        self.files = [''] * self.num_frames
        for v in read.listdirs(b, read.PATTERN['blockdir']):
            # Extract all .tif images in the input directory
            os.chdir(os.path.join(b, v))
            zipfile.ZipFile('PhaseSegment.zip').extractall()
            for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']):
                i = read.getframenum(f, read.PATTERN['phasetif'])
                if i < self.num_frames:
                    self.files[i] = os.path.join(b, v, 'PhaseSegment', f)
            os.chdir(old_dir)
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params):
    """
    Automated steps to perform prior to editing.

    """
    expt = os.path.basename(expt_analyses_dir)
    g = params['general']

    # First load or create log files for each position
    log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode'])

    # Execute each position in succession
    for p in positions:
        # Update the terminal display
        read.updatelog(expt, p, 'preedit')
        print 'start position ' + p + ': ' + time.asctime()

        posn_raw_data_dir = os.path.join(expt_raw_data_dir, p)
        posn_analyses_dir = os.path.join(expt_analyses_dir, p)

        # Segmented files will be saved to a temporary directory
        temp_dir = os.path.join(posn_analyses_dir, 'temp')
        if g['write_mode'] == 0:
            read.rmkdir(temp_dir)
        else:
            read.cmkdir(temp_dir)

        # Pad with default parameters, and find frames to process
        frame_start, frame_stop = float('inf'), 0.
        for mode in MODES:
            print '---mode', mode
            d = params[mode]

            # Pad with default parameters as necessary
            d = eval('%s.workflow.fillparams(d)' % mode)

            # Find all .tif images of specified type in the given directory
            d['segment']['file_list'] = []
            for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                if g['frame_range'][0] <= j < g['frame_range'][1]:
                    frame_start = min(frame_start, j)
                    frame_stop = max(frame_stop, j)
                    d['segment']['file_list'].append(f)
            frame_stop += 1


        # Create arguments for parallel processing
        args = [(posn_raw_data_dir, temp_dir,
                 MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])]
        file_list = sorted(args[0][3]['phase']['segment']['file_list'])

        # # debug: select only a few files -BK
        # print 'initial frame stop', frame_stop
        # frame_stop = 500
        # file_list = file_list[:frame_stop]
        # # debug: select only a few files -BK

        inds = partition_indices(file_list, g['num_procs'])
        for (sta_ind, end_ind), arg in zip(inds, args):
            arg[3]['phase']['segment']['file_list'] = file_list[sta_ind:end_ind]


        # Process each block of frames in parallel
        parallel.main(preeditblock, args, g['num_procs'])
        print 'extract: ' + time.asctime()


        # Archive the output files into .zip files, then delete each .tif
        num_tifs = frame_stop - frame_start
        num_digits = int(np.ceil(np.log10(num_tifs + 1)))

        # Create new set of directories with pre-specified block size
        frames = range(frame_start, frame_stop-1, g['block_size'])
        frames.append(frame_stop)
        block_frames = zip(frames[:-1], frames[1:])

        # Make directories to hold files, named according to frames
        read.cmkdir(os.path.join(posn_analyses_dir, 'blocks'))
        block_dirs = []
        for j1, j2 in block_frames:
            strs = [str(v).zfill(num_digits) for v in (j1, j2)]
            v = os.path.join(posn_analyses_dir, 'blocks',
                             'frame{}-{}'.format(*strs))
            os.mkdir(v)
            block_dirs.append(v)

        for m in MODES:
            # The segmented .tif files will be stored in a .zip file
            zip_name = m.capitalize() + 'Segment'
            [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs]

            # Find all segmented .tif images and transfer to the new directories
            d = params[m]
            for f in read.listfiles(temp_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        old_name = os.path.join(temp_dir, f)
                        zip_dir = os.path.join(block_dirs[i], zip_name)
                        shutil.move(old_name, zip_dir)

            # Zip each directory of segmented .tif files
            old_dir = os.path.abspath(os.curdir)
            for v in block_dirs:
                os.chdir(v)
                archive_util.make_zipfile(zip_name, zip_name)
                shutil.rmtree(zip_name)
                os.chdir(old_dir)

            # Make temporary directories for data outputs
            dat_name = m.capitalize() + 'Data'
            [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs]

            # Find all analyzed .pickle files and transfer to the new directories
            f, e = os.path.splitext(d['segment']['pattern'])
            dat_pattern = (f + '.pickle' + e[4:])
            for f in read.listfiles(temp_dir, dat_pattern):
                j = read.getframenum(f, dat_pattern)
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        # Transfer each frame to the correct block
                        old_name = os.path.join(temp_dir, f)
                        dat_dir = os.path.join(block_dirs[i], dat_name)
                        shutil.move(old_name, dat_dir)

            # Concatenate each set of files into a DataFrame for each parameter
            for block_dir in block_dirs:
                dat_dir = os.path.join(block_dir, dat_name)
                data = []
                for u in os.listdir(dat_dir):
                    dat_file = os.path.join(dat_dir, u)
                    try:
                        d = read_pickle(dat_file)
                    except:
                        pass
                    data.append(d)
                df = concat(data)
                df = df.reindex(sorted(df.index))
                for c in df.columns:
                    df[c].to_pickle(os.path.join(block_dir, c + '.pickle'))
                shutil.rmtree(dat_dir)
        print 'shuffle: ' + time.asctime()

        # Delete all temporary files
        shutil.rmtree(temp_dir)
        '''
        block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in
                      os.listdir(os.path.join(posn_analyses_dir, 'blocks'))
                      if 'frame' in v]
        '''
        # Track the blocks in parallel
        args = []
        for v in block_dirs:
            output_file = os.path.join(v, 'Trace.pickle')
            if os.path.isfile(output_file):
                os.remove(output_file)
            args.append((v, output_file, params['phase']['track']))
        parallel.main(trackblock, args, g['num_procs'])
        print 'track: ' + time.asctime()

        # Stitch independently-tracked trajectories together
        stitchblocks(block_dirs, params['phase']['track'])
        print 'stitch: ' + time.asctime()

        # Collate the data for manual editing
        output_file = os.path.join(posn_analyses_dir, 'edits.pickle')
        collateblocks(block_dirs, output_file, params['phase']['collate'])
        print 'collate: ' + time.asctime()

        # Update the experiment log file
        read.updatelog(expt, p, 'preedit', expt_analyses_dir)
        print 'final: ' + time.asctime()
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params):
    """
    Automated steps to perform prior to editing.

    """
    expt = os.path.basename(expt_analyses_dir)
    g = params['general']

    # First load or create log files for each position
    log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode'])

    # Execute each position in succession
    for p in positions:
        # Update the terminal display
        read.updatelog(expt, p, 'preedit')
        print 'start position ' + p + ': ' + time.asctime()

        posn_raw_data_dir = os.path.join(expt_raw_data_dir, p)
        posn_analyses_dir = os.path.join(expt_analyses_dir, p)

        # Segmented files will be saved to a temporary directory
        temp_dir = os.path.join(posn_analyses_dir, 'temp')
        if g['write_mode'] == 0:
            read.rmkdir(temp_dir)
        else:
            read.cmkdir(temp_dir)

        # Pad with default parameters, and find frames to process
        frame_start, frame_stop = float('inf'), 0.
        for mode in MODES:
            print '---mode', mode
            d = params[mode]

            # Pad with default parameters as necessary
            d = eval('%s.workflow.fillparams(d)' % mode)

            # Find all .tif images of specified type in the given directory
            d['segment']['file_list'] = []
            for f in read.listfiles(posn_raw_data_dir,
                                    d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                if g['frame_range'][0] <= j < g['frame_range'][1]:
                    frame_start = min(frame_start, j)
                    frame_stop = max(frame_stop, j)
                    d['segment']['file_list'].append(f)
            frame_stop += 1

        # Create arguments for parallel processing
        args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params))
                for _ in range(g['num_procs'])]
        file_list = sorted(args[0][3]['phase']['segment']['file_list'])

        # # debug: select only a few files -BK
        # print 'initial frame stop', frame_stop
        # frame_stop = 500
        # file_list = file_list[:frame_stop]
        # # debug: select only a few files -BK

        inds = partition_indices(file_list, g['num_procs'])
        for (sta_ind, end_ind), arg in zip(inds, args):
            arg[3]['phase']['segment']['file_list'] = file_list[
                sta_ind:end_ind]

        # Process each block of frames in parallel
        parallel.main(preeditblock, args, g['num_procs'])
        print 'extract: ' + time.asctime()

        # Archive the output files into .zip files, then delete each .tif
        num_tifs = frame_stop - frame_start
        num_digits = int(np.ceil(np.log10(num_tifs + 1)))

        # Create new set of directories with pre-specified block size
        frames = range(frame_start, frame_stop - 1, g['block_size'])
        frames.append(frame_stop)
        block_frames = zip(frames[:-1], frames[1:])

        # Make directories to hold files, named according to frames
        read.cmkdir(os.path.join(posn_analyses_dir, 'blocks'))
        block_dirs = []
        for j1, j2 in block_frames:
            strs = [str(v).zfill(num_digits) for v in (j1, j2)]
            v = os.path.join(posn_analyses_dir, 'blocks',
                             'frame{}-{}'.format(*strs))
            os.mkdir(v)
            block_dirs.append(v)

        for m in MODES:
            # The segmented .tif files will be stored in a .zip file
            zip_name = m.capitalize() + 'Segment'
            [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs]

            # Find all segmented .tif images and transfer to the new directories
            d = params[m]
            for f in read.listfiles(temp_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        old_name = os.path.join(temp_dir, f)
                        zip_dir = os.path.join(block_dirs[i], zip_name)
                        shutil.move(old_name, zip_dir)

            # Zip each directory of segmented .tif files
            old_dir = os.path.abspath(os.curdir)
            for v in block_dirs:
                os.chdir(v)
                archive_util.make_zipfile(zip_name, zip_name)
                shutil.rmtree(zip_name)
                os.chdir(old_dir)

            # Make temporary directories for data outputs
            dat_name = m.capitalize() + 'Data'
            [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs]

            # Find all analyzed .pickle files and transfer to the new directories
            f, e = os.path.splitext(d['segment']['pattern'])
            dat_pattern = (f + '.pickle' + e[4:])
            for f in read.listfiles(temp_dir, dat_pattern):
                j = read.getframenum(f, dat_pattern)
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        # Transfer each frame to the correct block
                        old_name = os.path.join(temp_dir, f)
                        dat_dir = os.path.join(block_dirs[i], dat_name)
                        shutil.move(old_name, dat_dir)

            # Concatenate each set of files into a DataFrame for each parameter
            for block_dir in block_dirs:
                dat_dir = os.path.join(block_dir, dat_name)
                data = []
                for u in os.listdir(dat_dir):
                    dat_file = os.path.join(dat_dir, u)
                    try:
                        d = read_pickle(dat_file)
                    except:
                        pass
                    data.append(d)
                df = concat(data)
                df = df.reindex(sorted(df.index))
                for c in df.columns:
                    df[c].to_pickle(os.path.join(block_dir, c + '.pickle'))
                shutil.rmtree(dat_dir)
        print 'shuffle: ' + time.asctime()

        # Delete all temporary files
        shutil.rmtree(temp_dir)
        '''
        block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in
                      os.listdir(os.path.join(posn_analyses_dir, 'blocks'))
                      if 'frame' in v]
        '''
        # Track the blocks in parallel
        args = []
        for v in block_dirs:
            output_file = os.path.join(v, 'Trace.pickle')
            if os.path.isfile(output_file):
                os.remove(output_file)
            args.append((v, output_file, params['phase']['track']))
        parallel.main(trackblock, args, g['num_procs'])
        print 'track: ' + time.asctime()

        # Stitch independently-tracked trajectories together
        stitchblocks(block_dirs, params['phase']['track'])
        print 'stitch: ' + time.asctime()

        # Collate the data for manual editing
        output_file = os.path.join(posn_analyses_dir, 'edits.pickle')
        collateblocks(block_dirs, output_file, params['phase']['collate'])
        print 'collate: ' + time.asctime()

        # Update the experiment log file
        read.updatelog(expt, p, 'preedit', expt_analyses_dir)
        print 'final: ' + time.asctime()