Ejemplo n.º 1
0
def read_img(input_dir):
    """
    Read image metadata. Search for both phase-contrast and fluorescence files.

    args:
        input_dir (path): raw data experiment directory

    """
    results = {
        k: {
            'include': False,
            'shape': (0, 0),
            'dtype': None
        }
        for k in ('phase', 'fluor')
    }

    # Only look in the directory of the first position
    p = read.listdirs(input_dir, read.PATTERN['posndir']).next()

    # Try to read metadata about phase and fluor files (need one image each)
    for k in ('phase', 'fluor'):
        pattern = read.PATTERN['%stif' % k]
        for v in read.listfiles(os.path.join(input_dir, p), pattern):
            if os.path.splitext(v)[1] == '.tif':
                phase_img = imread(os.path.join(input_dir, p, v))
                results[k]['include'] = True
                results[k]['shape'] = phase_img.shape
                results[k]['dtype'] = phase_img.dtype
                break
    return results
Ejemplo n.º 2
0
def read_img(input_dir):
    """
    Read image metadata. Search for both phase-contrast and fluorescence files.

    args:
        input_dir (path): raw data experiment directory

    """
    results = {k : {'include' : False, 'shape' : (0, 0), 'dtype' : None}
               for k in ('phase', 'fluor')}

    # Only look in the directory of the first position
    p = read.listdirs(input_dir, read.PATTERN['posndir']).next()

    # Try to read metadata about phase and fluor files (need one image each)
    for k in ('phase', 'fluor'):
        pattern = read.PATTERN['%stif' % k]
        for v in read.listfiles(os.path.join(input_dir, p), pattern):
            if os.path.splitext(v)[1] == '.tif':
                phase_img = imread(os.path.join(input_dir, p, v))
                results[k]['include'] = True
                results[k]['shape'] = phase_img.shape
                results[k]['dtype'] = phase_img.dtype
                break
    return results
    def load_posn(self):
        """
        Load data for the current position.

        """
        self.posn_dir = os.path.join(self.analyses_dir,
                                     self.posns[self.posn_idx])
        b = os.path.join(self.posn_dir, 'blocks')

        # Read in values for the current position
        self.data_file = os.path.join(self.posn_dir, 'edits.pickle')
        self.data = read_pickle(self.data_file)

        # Read in values from the log file
        log_file = os.path.join(self.posn_dir, 'log.pickle')
        log_data = pickle.load(open(log_file, 'rb'))
        self.img_shape = log_data['image']['phase']['shape']
        self.img_dtype = log_data['image']['phase']['dtype']
        self.pumps = log_data['pumps']

        self.TraceList = list(self.data['Trace'])
        self.SavedList = [
            v for i, v in self.data['Trace'].iteritems()
            if self.data['Saved'][i]
        ]
        self.num_traces = len(self.TraceList)
        if self.num_traces < 1:
            return
        self.num_frames = len(self.data.ix[0]['Label'])
        self.frames = np.arange(self.num_frames)
        self.time_phase = log_data['phase'][:self.num_frames]
        self.time = self.time_phase / 60
        if log_data.has_key('fluor'):
            max_time = np.max(self.time_phase)
            num_frames_fluor = np.argmin(
                np.abs(log_data['fluor'] - max_time)) + 1
            self.time_fluor = log_data['fluor'][:num_frames_fluor]

        # Unzip phase-contrast image files and read in names of image files
        old_dir = os.curdir
        self.files = [''] * self.num_frames
        for v in read.listdirs(b, read.PATTERN['blockdir']):
            # Extract all .tif images in the input directory
            os.chdir(os.path.join(b, v))
            zipfile.ZipFile('PhaseSegment.zip').extractall()
            for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']):
                i = read.getframenum(f, read.PATTERN['phasetif'])
                if i < self.num_frames:
                    self.files[i] = os.path.join(b, v, 'PhaseSegment', f)
            os.chdir(old_dir)
Ejemplo n.º 4
0
    def load_posn(self):
        """
        Load data for the current position.

        """
        self.posn_dir = os.path.join(self.analyses_dir,
                                     self.posns[self.posn_idx])
        b = os.path.join(self.posn_dir, 'blocks')

        # Read in values for the current position
        self.data_file = os.path.join(self.posn_dir, 'edits.pickle')
        self.data = read_pickle(self.data_file)

        # Read in values from the log file
        log_file = os.path.join(self.posn_dir, 'log.pickle')
        log_data = pickle.load(open(log_file, 'rb'))
        self.img_shape = log_data['image']['phase']['shape']
        self.img_dtype = log_data['image']['phase']['dtype']
        self.pumps = log_data['pumps']

        self.TraceList = list(self.data['Trace'])
        self.SavedList = [v for i, v in self.data['Trace'].iteritems() if
            self.data['Saved'][i]]
        self.num_traces = len(self.TraceList)
        if self.num_traces < 1:
            return
        self.num_frames = len(self.data.ix[0]['Label'])
        self.frames = np.arange(self.num_frames)
        self.time_phase = log_data['phase'][:self.num_frames]
        self.time = self.time_phase / 60
        if log_data.has_key('fluor'):
            max_time = np.max(self.time_phase)
            num_frames_fluor = np.argmin(np.abs(log_data['fluor']-max_time)) + 1
            self.time_fluor = log_data['fluor'][:num_frames_fluor]

        # Unzip phase-contrast image files and read in names of image files
        old_dir = os.curdir
        self.files = [''] * self.num_frames
        for v in read.listdirs(b, read.PATTERN['blockdir']):
            # Extract all .tif images in the input directory
            os.chdir(os.path.join(b, v))
            zipfile.ZipFile('PhaseSegment.zip').extractall()
            for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']):
                i = read.getframenum(f, read.PATTERN['phasetif'])
                if i < self.num_frames:
                    self.files[i] = os.path.join(b, v, 'PhaseSegment', f)
            os.chdir(old_dir)
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params):
    """
    Automated steps to perform prior to editing.

    """
    expt = os.path.basename(expt_analyses_dir)
    g = params['general']

    # First load or create log files for each position
    log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode'])

    # Execute each position in succession
    for p in positions:
        # Update the terminal display
        read.updatelog(expt, p, 'preedit')
        print 'start position ' + p + ': ' + time.asctime()

        posn_raw_data_dir = os.path.join(expt_raw_data_dir, p)
        posn_analyses_dir = os.path.join(expt_analyses_dir, p)

        # Segmented files will be saved to a temporary directory
        temp_dir = os.path.join(posn_analyses_dir, 'temp')
        if g['write_mode'] == 0:
            read.rmkdir(temp_dir)
        else:
            read.cmkdir(temp_dir)

        # Pad with default parameters, and find frames to process
        frame_start, frame_stop = float('inf'), 0.
        for mode in MODES:
            print '---mode', mode
            d = params[mode]

            # Pad with default parameters as necessary
            d = eval('%s.workflow.fillparams(d)' % mode)

            # Find all .tif images of specified type in the given directory
            d['segment']['file_list'] = []
            for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                if g['frame_range'][0] <= j < g['frame_range'][1]:
                    frame_start = min(frame_start, j)
                    frame_stop = max(frame_stop, j)
                    d['segment']['file_list'].append(f)
            frame_stop += 1


        # Create arguments for parallel processing
        args = [(posn_raw_data_dir, temp_dir,
                 MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])]
        file_list = sorted(args[0][3]['phase']['segment']['file_list'])

        # # debug: select only a few files -BK
        # print 'initial frame stop', frame_stop
        # frame_stop = 500
        # file_list = file_list[:frame_stop]
        # # debug: select only a few files -BK

        inds = partition_indices(file_list, g['num_procs'])
        for (sta_ind, end_ind), arg in zip(inds, args):
            arg[3]['phase']['segment']['file_list'] = file_list[sta_ind:end_ind]


        # Process each block of frames in parallel
        parallel.main(preeditblock, args, g['num_procs'])
        print 'extract: ' + time.asctime()


        # Archive the output files into .zip files, then delete each .tif
        num_tifs = frame_stop - frame_start
        num_digits = int(np.ceil(np.log10(num_tifs + 1)))

        # Create new set of directories with pre-specified block size
        frames = range(frame_start, frame_stop-1, g['block_size'])
        frames.append(frame_stop)
        block_frames = zip(frames[:-1], frames[1:])

        # Make directories to hold files, named according to frames
        read.cmkdir(os.path.join(posn_analyses_dir, 'blocks'))
        block_dirs = []
        for j1, j2 in block_frames:
            strs = [str(v).zfill(num_digits) for v in (j1, j2)]
            v = os.path.join(posn_analyses_dir, 'blocks',
                             'frame{}-{}'.format(*strs))
            os.mkdir(v)
            block_dirs.append(v)

        for m in MODES:
            # The segmented .tif files will be stored in a .zip file
            zip_name = m.capitalize() + 'Segment'
            [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs]

            # Find all segmented .tif images and transfer to the new directories
            d = params[m]
            for f in read.listfiles(temp_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        old_name = os.path.join(temp_dir, f)
                        zip_dir = os.path.join(block_dirs[i], zip_name)
                        shutil.move(old_name, zip_dir)

            # Zip each directory of segmented .tif files
            old_dir = os.path.abspath(os.curdir)
            for v in block_dirs:
                os.chdir(v)
                archive_util.make_zipfile(zip_name, zip_name)
                shutil.rmtree(zip_name)
                os.chdir(old_dir)

            # Make temporary directories for data outputs
            dat_name = m.capitalize() + 'Data'
            [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs]

            # Find all analyzed .pickle files and transfer to the new directories
            f, e = os.path.splitext(d['segment']['pattern'])
            dat_pattern = (f + '.pickle' + e[4:])
            for f in read.listfiles(temp_dir, dat_pattern):
                j = read.getframenum(f, dat_pattern)
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        # Transfer each frame to the correct block
                        old_name = os.path.join(temp_dir, f)
                        dat_dir = os.path.join(block_dirs[i], dat_name)
                        shutil.move(old_name, dat_dir)

            # Concatenate each set of files into a DataFrame for each parameter
            for block_dir in block_dirs:
                dat_dir = os.path.join(block_dir, dat_name)
                data = []
                for u in os.listdir(dat_dir):
                    dat_file = os.path.join(dat_dir, u)
                    try:
                        d = read_pickle(dat_file)
                    except:
                        pass
                    data.append(d)
                df = concat(data)
                df = df.reindex(sorted(df.index))
                for c in df.columns:
                    df[c].to_pickle(os.path.join(block_dir, c + '.pickle'))
                shutil.rmtree(dat_dir)
        print 'shuffle: ' + time.asctime()

        # Delete all temporary files
        shutil.rmtree(temp_dir)
        '''
        block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in
                      os.listdir(os.path.join(posn_analyses_dir, 'blocks'))
                      if 'frame' in v]
        '''
        # Track the blocks in parallel
        args = []
        for v in block_dirs:
            output_file = os.path.join(v, 'Trace.pickle')
            if os.path.isfile(output_file):
                os.remove(output_file)
            args.append((v, output_file, params['phase']['track']))
        parallel.main(trackblock, args, g['num_procs'])
        print 'track: ' + time.asctime()

        # Stitch independently-tracked trajectories together
        stitchblocks(block_dirs, params['phase']['track'])
        print 'stitch: ' + time.asctime()

        # Collate the data for manual editing
        output_file = os.path.join(posn_analyses_dir, 'edits.pickle')
        collateblocks(block_dirs, output_file, params['phase']['collate'])
        print 'collate: ' + time.asctime()

        # Update the experiment log file
        read.updatelog(expt, p, 'preedit', expt_analyses_dir)
        print 'final: ' + time.asctime()
Ejemplo n.º 6
0
def read_pumps(input_dir, log_dict):
    """
    Read in metadata about pump start/stop times, flow rates and units; also 
    save the solution used in each pump into these results.

    args:
        input_dir (path): raw data experiment directory
        log_dict (dict): log file parameter-value pairs (from read_log), with 
            updated fields corresponding to "Start Date" (datetime) and "Total 
            Time" in seconds (float)

    """

    # Read in pump information
    imported = []
    for p in read.listfiles(input_dir, r'^pump[\d]+\.txt$'):
        df = read_table(os.path.join(input_dir, p), header=None,
                        names=('DateTime', 'Rate'), index_col='DateTime')
        ts = [datetime.strptime(v, DATETIME_FORMAT) for v in df.index.values]
        df.index = [(v - log_dict['Start Date']).total_seconds() for v in ts]

        df['Units'] = ''
        for k, v in df['Rate'].iteritems():
            m = re.match(r'([0-9\.]*)([A-z]*)', v)
            df['Rate'].ix[k] = float(m.group(1))
            df['Units'].ix[k] = m.group(2)
        imported.append(df)

    # Find the solutions in the pumps
    soln_dict = {}
    for k, v in log_dict.iteritems():
        m = re.match('Pump ([0-9]+) Solution', k)
        if m:
            soln_dict[int(m.group(1))] = v
    soln_keys = sorted(soln_dict.keys())

    # Reformat according to on vs. off
    results = []
    for i, df in enumerate(imported):
        d = {'Time' : [], 'Rate' : [], 'Units' : '', 'Solution' : []}
        d['Solution'] = soln_dict[soln_keys[i]]
        for j, t1 in enumerate(df.index):
            r = df['Rate'].ix[t1]
            u = df['Units'].ix[t1]
            if j+1 < len(df.index):
                t2 = df.index[j+1]
            else:
                t2 = log_dict['Total Time']
            d['Rate'].append(r)
            if not d['Units']:
                # Convert pump unit codes to legible values (in TeX format)
                v, t = u[:2], u[2:]
                if v == 'UL':
                    v = u'\u03bcL'
                elif v == 'ML':
                    v == u'mL'
                if t == 'M':
                    t = '/min'
                elif t == 'H':
                    t = '/hr'
                d['Units'] = v + t
            d['Time'].append([t1, t2])
        results.append(d)
    return results
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params):
    """
    Automated steps to perform prior to editing.

    """
    expt = os.path.basename(expt_analyses_dir)
    g = params['general']

    # First load or create log files for each position
    log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode'])

    # Execute each position in succession
    for p in positions:
        # Update the terminal display
        read.updatelog(expt, p, 'preedit')
        print 'start position ' + p + ': ' + time.asctime()

        posn_raw_data_dir = os.path.join(expt_raw_data_dir, p)
        posn_analyses_dir = os.path.join(expt_analyses_dir, p)

        # Segmented files will be saved to a temporary directory
        temp_dir = os.path.join(posn_analyses_dir, 'temp')
        if g['write_mode'] == 0:
            read.rmkdir(temp_dir)
        else:
            read.cmkdir(temp_dir)

        # Pad with default parameters, and find frames to process
        frame_start, frame_stop = float('inf'), 0.
        for mode in MODES:
            print '---mode', mode
            d = params[mode]

            # Pad with default parameters as necessary
            d = eval('%s.workflow.fillparams(d)' % mode)

            # Find all .tif images of specified type in the given directory
            d['segment']['file_list'] = []
            for f in read.listfiles(posn_raw_data_dir,
                                    d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                if g['frame_range'][0] <= j < g['frame_range'][1]:
                    frame_start = min(frame_start, j)
                    frame_stop = max(frame_stop, j)
                    d['segment']['file_list'].append(f)
            frame_stop += 1

        # Create arguments for parallel processing
        args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params))
                for _ in range(g['num_procs'])]
        file_list = sorted(args[0][3]['phase']['segment']['file_list'])

        # # debug: select only a few files -BK
        # print 'initial frame stop', frame_stop
        # frame_stop = 500
        # file_list = file_list[:frame_stop]
        # # debug: select only a few files -BK

        inds = partition_indices(file_list, g['num_procs'])
        for (sta_ind, end_ind), arg in zip(inds, args):
            arg[3]['phase']['segment']['file_list'] = file_list[
                sta_ind:end_ind]

        # Process each block of frames in parallel
        parallel.main(preeditblock, args, g['num_procs'])
        print 'extract: ' + time.asctime()

        # Archive the output files into .zip files, then delete each .tif
        num_tifs = frame_stop - frame_start
        num_digits = int(np.ceil(np.log10(num_tifs + 1)))

        # Create new set of directories with pre-specified block size
        frames = range(frame_start, frame_stop - 1, g['block_size'])
        frames.append(frame_stop)
        block_frames = zip(frames[:-1], frames[1:])

        # Make directories to hold files, named according to frames
        read.cmkdir(os.path.join(posn_analyses_dir, 'blocks'))
        block_dirs = []
        for j1, j2 in block_frames:
            strs = [str(v).zfill(num_digits) for v in (j1, j2)]
            v = os.path.join(posn_analyses_dir, 'blocks',
                             'frame{}-{}'.format(*strs))
            os.mkdir(v)
            block_dirs.append(v)

        for m in MODES:
            # The segmented .tif files will be stored in a .zip file
            zip_name = m.capitalize() + 'Segment'
            [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs]

            # Find all segmented .tif images and transfer to the new directories
            d = params[m]
            for f in read.listfiles(temp_dir, d['segment']['pattern']):
                j = read.getframenum(f, d['segment']['pattern'])
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        old_name = os.path.join(temp_dir, f)
                        zip_dir = os.path.join(block_dirs[i], zip_name)
                        shutil.move(old_name, zip_dir)

            # Zip each directory of segmented .tif files
            old_dir = os.path.abspath(os.curdir)
            for v in block_dirs:
                os.chdir(v)
                archive_util.make_zipfile(zip_name, zip_name)
                shutil.rmtree(zip_name)
                os.chdir(old_dir)

            # Make temporary directories for data outputs
            dat_name = m.capitalize() + 'Data'
            [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs]

            # Find all analyzed .pickle files and transfer to the new directories
            f, e = os.path.splitext(d['segment']['pattern'])
            dat_pattern = (f + '.pickle' + e[4:])
            for f in read.listfiles(temp_dir, dat_pattern):
                j = read.getframenum(f, dat_pattern)
                for i, (j1, j2) in enumerate(block_frames):
                    if j1 <= j < j2:
                        # Transfer each frame to the correct block
                        old_name = os.path.join(temp_dir, f)
                        dat_dir = os.path.join(block_dirs[i], dat_name)
                        shutil.move(old_name, dat_dir)

            # Concatenate each set of files into a DataFrame for each parameter
            for block_dir in block_dirs:
                dat_dir = os.path.join(block_dir, dat_name)
                data = []
                for u in os.listdir(dat_dir):
                    dat_file = os.path.join(dat_dir, u)
                    try:
                        d = read_pickle(dat_file)
                    except:
                        pass
                    data.append(d)
                df = concat(data)
                df = df.reindex(sorted(df.index))
                for c in df.columns:
                    df[c].to_pickle(os.path.join(block_dir, c + '.pickle'))
                shutil.rmtree(dat_dir)
        print 'shuffle: ' + time.asctime()

        # Delete all temporary files
        shutil.rmtree(temp_dir)
        '''
        block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in
                      os.listdir(os.path.join(posn_analyses_dir, 'blocks'))
                      if 'frame' in v]
        '''
        # Track the blocks in parallel
        args = []
        for v in block_dirs:
            output_file = os.path.join(v, 'Trace.pickle')
            if os.path.isfile(output_file):
                os.remove(output_file)
            args.append((v, output_file, params['phase']['track']))
        parallel.main(trackblock, args, g['num_procs'])
        print 'track: ' + time.asctime()

        # Stitch independently-tracked trajectories together
        stitchblocks(block_dirs, params['phase']['track'])
        print 'stitch: ' + time.asctime()

        # Collate the data for manual editing
        output_file = os.path.join(posn_analyses_dir, 'edits.pickle')
        collateblocks(block_dirs, output_file, params['phase']['collate'])
        print 'collate: ' + time.asctime()

        # Update the experiment log file
        read.updatelog(expt, p, 'preedit', expt_analyses_dir)
        print 'final: ' + time.asctime()
Ejemplo n.º 8
0
def read_pumps(input_dir, log_dict):
    """
    Read in metadata about pump start/stop times, flow rates and units; also 
    save the solution used in each pump into these results.

    args:
        input_dir (path): raw data experiment directory
        log_dict (dict): log file parameter-value pairs (from read_log), with 
            updated fields corresponding to "Start Date" (datetime) and "Total 
            Time" in seconds (float)

    """

    # Read in pump information
    imported = []
    for p in read.listfiles(input_dir, r'^pump[\d]+\.txt$'):
        df = read_table(os.path.join(input_dir, p),
                        header=None,
                        names=('DateTime', 'Rate'),
                        index_col='DateTime')
        ts = [datetime.strptime(v, DATETIME_FORMAT) for v in df.index.values]
        df.index = [(v - log_dict['Start Date']).total_seconds() for v in ts]

        df['Units'] = ''
        for k, v in df['Rate'].iteritems():
            m = re.match(r'([0-9\.]*)([A-z]*)', v)
            df['Rate'].ix[k] = float(m.group(1))
            df['Units'].ix[k] = m.group(2)
        imported.append(df)

    # Find the solutions in the pumps
    soln_dict = {}
    for k, v in log_dict.iteritems():
        m = re.match('Pump ([0-9]+) Solution', k)
        if m:
            soln_dict[int(m.group(1))] = v
    soln_keys = sorted(soln_dict.keys())

    # Reformat according to on vs. off
    results = []
    for i, df in enumerate(imported):
        d = {'Time': [], 'Rate': [], 'Units': '', 'Solution': []}
        d['Solution'] = soln_dict[soln_keys[i]]
        for j, t1 in enumerate(df.index):
            r = df['Rate'].ix[t1]
            u = df['Units'].ix[t1]
            if j + 1 < len(df.index):
                t2 = df.index[j + 1]
            else:
                t2 = log_dict['Total Time']
            d['Rate'].append(r)
            if not d['Units']:
                # Convert pump unit codes to legible values (in TeX format)
                v, t = u[:2], u[2:]
                if v == 'UL':
                    v = u'\u03bcL'
                elif v == 'ML':
                    v == u'mL'
                if t == 'M':
                    t = '/min'
                elif t == 'H':
                    t = '/hr'
                d['Units'] = v + t
            d['Time'].append([t1, t2])
        results.append(d)
    return results