Ejemplo n.º 1
0
def make_dataset_item(filename,
                      metadata=None,
                      base_dir=None,
                      data_dir=None,
                      out_dir=None):
    """ Create an item for dataset 
    
    filename: the item `filename` attribute
    metadata: the item `metadata` attribute - default: empty Mapping object
    base_dir: base directory of others - default: filename dir
    data_dir: directory of filename - default: base_dir 
    out_dir:  directory for output  - default: base_dir
    
    item data file has the following set:
     - attribute 'filename': the given value
     - attribute '__key__':  the filename with data_dir & extension removed
     - item file (used by dump&load):        out_dir/__key__+'.namespace'
     - __storage__ (for external attribute): out_dir/__key__+'_{}'
     
    returns the constructed dataset item
    """
    import os
    file_dir, file_base = os.path.split(filename)
    file_base, file_ext = os.path.splitext(file_base)

    if metadata is None: metadata = _Mapping()
    if base_dir is None: base_dir = os.path.dirname(filename)
    if data_dir is None: data_dir = base_dir
    if out_dir is None: out_dir = base_dir

    file_base = filename[len(data_dir):].strip(os.sep)
    key = os.path.splitext(file_base)[0]

    item_file = os.path.join(out_dir, key) + '.namespace'
    map_store = os.path.join(out_dir, key) + '_{}'

    item = _Mapping(filename=filename, metadata=metadata, __key__=key)
    item.__loader_attributes__ = ['filename', 'metadata']
    item.set_file(item_file, storage=True)

    return item
Ejemplo n.º 2
0
def _load_ini_file(ini_file):
    """
    return the ini file content as a hierarchy of Mapping objects
    
    can manage: 
     - multilevel key name
     - inheritance of *previous* section:
     
    Example::
    
        [section1]
        a=1
        [section2:section1]
        subsection.value=42
        
        returns a Mapping containing:
        section1
          a=1
        section2
          a=1
          subsection
            value=42
    """
    import ConfigParser as cfg
    ini = cfg.ConfigParser()
    ini.read(ini_file)
    m = _Mapping()
    for section in ini.sections():
        s, parent = (section + ':').split(':')[:2]
        if len(parent):
            parent = m[parent]
        else:
            parent = {}
        m[s] = _Mapping(**parent)
        for k, v in ini.items(section):
            _add_multilevel_key_value(m[s], k, _param_eval(v))
    return m
Ejemplo n.º 3
0
    def match_plants(self, max_distance=None):
        """
        Find a 1-to-1 matching between ref & cmp trees from geometric distance of the seeds.
        
        It adds the following attributes to this  object:
         - plant_map: a dictionary of (ref-plant-id:cmp-plant-id) pairs
         - plant_missed: the number of unmatched plants
        
        This method loads the ref&cmp trees - call `clear()` to unload them.
        """
        def distance_matrix(x1,y1,x2,y2):
            x1 = x1.reshape(-1,1)
            y1 = y1.reshape(-1,1)
            x2 = x2.reshape(1,-1)
            y2 = y2.reshape(1,-1)
            return ((x1-x2)**2 + (y1-y2)**2)**.5

        # match root plant w.r.t seed position 
        # ------------------------------------
        def seed_position(t):
            """
            output: plant-id, x, y
            """
            mask  = t.segment.seed>0
            nseed = t.segment.node[mask]
            lseed = t.segment.seed[mask]
            mask  = nseed.all(axis=1)  # remove bg segment
            nseed = nseed[mask]
            lseed = lseed[mask]
            
            pid = np.unique(lseed)
            x = nd.mean(t.node.x()[nseed],labels=lseed.reshape(-1,1),index=pid)
            y = nd.mean(t.node.y()[nseed],labels=lseed.reshape(-1,1),index=pid)
            
            return pid,x,y
            
        rpid, rx, ry = seed_position(self.get('ref'))
        cpid, cx, cy = seed_position(self.get('cmp'))
        
        d = distance_matrix(rx,ry,cx,cy)
        ##s1 = set(zip(range(d.shape[0]),np.argmin(d,axis=1)))
        ##s2 = set(zip(np.argmin(d,axis=0),range(d.shape[1])))
        ##match = s1.intersection(s2)
        match,r_unmatch,c_unmatch = direct_matching(d,max_d=max_distance)
        
        self.mapping = _Mapping()
        self.mapping.plant = dict((rpid[p1],cpid[p2]) for p1,p2 in match)
        self.mapping.plant_missed_ref = [rpid[i] for i in r_unmatch]
        self.mapping.plant_missed_cmp = [cpid[i] for i in c_unmatch]
Ejemplo n.º 4
0
def compute_tree_stat(tree, stat_names='all', mask=None, save=True):
    """ compute all statistic listed in stat_name, using optional mask filter function"""
    if stat_names == 'all':
        run = stat_list
    else:
        run = dict([(n, stat_list[n]) for n in stat_names])

    stat = _Mapping()
    for name, fct in run.iteritems():
        stat[name] = fct(tree, mask=mask)
    tree.stat = stat

    if save: tree.dump()

    return tree
Ejemplo n.º 5
0
def _plot_tc(tc,x,y,plant_id, title,xlabel,ylabel, split=False, merge_unique=False, content='scatter', legend=str, print_fct=None, cla=True):
    """
    actual ploting done by `plot_stat` and `plot_compare`
    
    legend: a function that convert label into string
    content: either 'scatter' or 'box'
    """
    import matplotlib.pyplot as plt
    from matplotlib import pylab
    from matplotlib.backends import backend, interactive_bk
    
    if cla:
        plt.cla()
    
    if split is None:
        plt.plot(x, y, '.')
        if print_fct: print_fct(title,x,y)
    else:
        if isinstance(split,basestring):
            split = ['metadata'] + split.split('.')
            label = [reduce(getattr, [t]+split) for t in tc]
            label = np.array(label)
            label_set = np.unique(label)
        else:
            label=[]
            for spl in split:
                spl = ['metadata'] + spl.split('.')
                label.append([reduce(getattr, [t]+spl) for t in tc])
            from rhizoscan.ndarray import unique_rows
            label = np.array(label).T
            label_set = unique_rows(label)
        
        if content=='scatter':
            color = ['b','g','r','c','m','y','k']
            for i,lab in enumerate(label_set):
                lab_mask = label==lab
                if lab_mask.ndim>1: lab_mask = lab_mask.all(axis=-1)
                yi = y[lab_mask]
                xi = x[lab_mask]
                if merge_unique:
                    pos = np.concatenate([xi[:,None],yi[:,None]],axis=-1)
                    pos = np.ascontiguousarray(pos).view([('x',pos.dtype),('y',pos.dtype)])
                    v,s = np.unique(pos, return_inverse=1)
                    size = np.bincount(s)
                    xi,yi  = v['x'],v['y']
                else:
                    size = 2
                label_str = legend(lab)
                colori = color[i%len(color)]
                plt.scatter(xi, yi, s=8*size, c=colori, edgecolors='none', label=label_str)
                if print_fct: print_fct(title+' - '+label_str, xi,yi)
            plt.legend(loc=0) 
        else:  # if content=='box':
            boxes = []
            names = []
            for i,lab in enumerate(label_set):
                lab_mask = label==lab
                if lab_mask.ndim>1: lab_mask = lab_mask.all(axis=-1)
                #xi = x[lab_mask]
                boxes.append(y[lab_mask])
                names.append(legend(lab))
            bp = plt.boxplot(boxes)
            for f in  bp['fliers']: f.remove()
            plt.xticks(range(1,len(names)+1),names)
            
        
    ax = plt.gca()
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    
    if backend in interactive_bk:
        ax.tree_data = _Mapping(title=title,x=x,y=y,tc=tc, plant_id=plant_id)
        flag = '_ROOT_MEASUREMENT_CB'
        if not hasattr(plt.gcf(),flag):
            cid = pylab.connect('button_press_event', _plot_axe_selected_tree)
            setattr(plt.gcf(),flag,cid)
        
    return ax
Ejemplo n.º 6
0
 def add_comparison(self,name,value):
     self.setdefault('comparison',_Mapping())[name] = value
Ejemplo n.º 7
0
 def assert_mapping(subm, k):
     if not subm.has_key(k) or not hasattr(subm[k], 'iteritems'):
         subm[k] = _Mapping()
     return subm[k]
Ejemplo n.º 8
0
def make_dataset(ini_file,
                 base_dir=None,
                 data_dir=None,
                 out_dir=None,
                 verbose=False):
    """
    Return a list of dataset item following parsing rules found in `ini_file`
    
    :Inputs:
      - `ini_file`: 
          file with ini-formated content indicating the dataset to be loaded
          See the 'ini format' section for details
      - `base_dir`:
          Starting directories for inputs and outputs (see 'directories' below)
          If not given, use the directory of given `ini_file`
      - `data_dir`:
          Directories to look for data inputs         (see 'ini format' below)
          If not given, use the value in the ini file, or `base_dir`
          If it is not an absolute path, preppend it with `base_dir`
      - `out_dir`:
          Directories to set output into              (see 'directories' below)
          If not given, use the value in the ini file, or `base_dir`
          If it is not an absolute path, preppend it with `base_dir`
      - `verbose`:
          If >0, print some message on loaded dataset
    
    :Outputs:
      - A list of `Mapping` object, one for each file found with suitable output
        files configure (nothing is saved). Each contains the attributes:
          - 'filename': the found file name
          - 'metadata': constructed from the ini file
          - '__key__':  an id key made from 'filename' with `data_dir` removed 
      - The list of files found but which could not be parsed
      - The base output directory to all item (see 'directories' below)
    
    :directories:
        All output files and directories are set following the values of given 
        `base_dir`, `data_dir`, and `out_dir`
        
        The associated file of output `Mapping` items are set to:
          "[out_dir]/[item-end].namespace"
           
        The output items have their FileStorage set (see Mapping doc) to:
          "[out_dir]/[item-end]_{}"
          
        Where `item-end` is the remaining part of the filename of found items 
        after removing `data_dir` from the start and the file extension.
        
        See datastruture.Data and Mapping documentations for details on `Data` 
        associated file and `Mapping` FileStorage
        
    :ini format:
        ##todo
    """
    import os
    from os.path import join as pjoin
    from os.path import splitext, dirname, exists
    import re
    from time import strptime
    from glob import glob

    from rhizoscan.misc.path import abspath

    if not exists(ini_file):
        raise TypeError('input "ini_file" does not exist')

    # load content of ini file
    ini = _load_ini_file(ini_file)

    if verbose > 2:
        print 'loaded ini:'
        print ini.multilines_str(tab=1)

    # directory variable
    if base_dir is None:
        base_dir = dirname(abspath(ini_file))

    if data_dir is None:
        data_dir = ini['PARSING'].get('data_dir')
        if not data_dir:
            data_dir = base_dir
    data_dir = abspath(data_dir, base_dir)

    if out_dir is None:
        out_dir = ini['PARSING'].get('out_dir')
        if not out_dir:
            out_dir = base_dir
    out_dir = abspath(out_dir, base_dir)

    # find all files that fit pattern given in ini_file
    # -------------------------------------------------
    # list all suitable files
    file_pattern = ini['PARSING']['pattern']
    file_pattern = file_pattern.replace('\\', '/')  # for windows
    file_pattern = re.split('[\[\]]', file_pattern)

    glob_pattern = pjoin(data_dir, '*'.join(file_pattern[::2]))
    file_list = sorted(glob(glob_pattern))

    if verbose:
        print 'glob:', glob_pattern
        if verbose > 1:
            print '   ' + '\n   '.join(file_list)

    # prepare metatata parser
    # -----------------------
    # meta data list and regular expression to parse file names
    group_re = dict(int='([0-9]*)', float='([-+]?[0-9]*\.?[0-9]+)')
    ##meta_parser = re.compile('(.*)'.join([fp.replace('*','.*') for fp in file_pattern[::2]]))
    meta_list = [m.split(':') for m in file_pattern[1::2]]
    meta_list = [m if len(m) > 1 else m + ['str'] for m in meta_list]
    meta_parser = file_pattern[:]
    meta_parser[1::2] = [
        group_re.get(mtype, '(.*)') for name, mtype in meta_list
    ]
    meta_parser = re.compile(''.join(meta_parser))
    meta_list = [_Mapping(name=name, type=mtype) for name, mtype in meta_list]
    date_pattern = ini['PARSING'].get('date', '')  ## to remove?

    types = dict(int=int, float=float, str=str)
    for m in meta_list:
        if m.type == 'date':  ## to remove?
            m.eval = lambda s: strptime(s, date_pattern)
        elif m.type == "$":
            default = m.name + '_default'
            m.eval = lambda name: ini.get(name, default=default)
        else:
            try:
                m.eval = types[m.type]
            except KeyError:
                raise KeyError('unrecognized parsing type %s for field %s' %
                               (m.type, m.name))

    default_meta = ini.get('metadata', {})
    for k, v in default_meta.iteritems():
        default_meta[k] = _param_eval(v)

    # if grouping
    if ini['PARSING'].has_key('group'):
        g = ini['PARSING']['group']  ## eval... value is a dict
        dlist = [dirname(fi) for fi in file_list]
        fenum = [int(di == dlist[i])
                 for i, di in enumerate(dlist[1:])]  # diff of dlist
        fenum = _np.array(
            reduce(lambda L, y: L + [(L[-1] + y) * y],
                   [[0]] + fenum))  # ind of file in resp. dir.
        group = _np.zeros(fenum.max() + 1,
                          dtype='|S' +
                          str(max([len(gi) for gi in g.itervalues()])))
        for start in sorted(g.keys()):
            group[start:] = [g[start]] * (len(group) - start)
        group = group[fenum]
        if verbose:
            print 'group:', g
            if verbose > 1:
                print '   detected:', group
    else:
        group = None

    if verbose:
        print 'metadata:', meta_parser.pattern,
        print '> ' + ', '.join((m.name + ':' + m.type for m in meta_list))

    # get global variable
    global_attr = ini.get('global', {})

    # parse all image files, set metadata and remove invalid
    # ------------------------------------------------------
    img_list = Dataset()
    invalid = []
    rm_len = len(
        data_dir)  ## imply images are in base_dir. is there a more general way
    for ind, f in enumerate(file_list):
        try:
            if rm_len > 0: subf = f[rm_len + 1:]
            else: subf = f
            subf = subf.replace('\\', '/')  # for windows
            fkey = splitext(subf)[0]
            out_file = pjoin(out_dir, fkey) + '.namespace'

            meta_value = meta_parser.match(subf).groups()
            if verbose > 1:
                print '   ' + str(meta_value) + ' from ' + subf + str(rm_len)
            meta = _Mapping(**default_meta)
            if group is not None:
                meta.update(ini.get(group[ind], default=[]))
            for i, value in enumerate(meta_value):
                field = meta_list[i].name
                value = meta_list[i].eval(value)
                if field == '$':
                    meta.update(value)
                else:
                    _add_multilevel_key_value(meta, field, value)

            ds_item = _Mapping(filename=f,
                               metadata=meta,
                               __key__=fkey,
                               **global_attr)
            ds_item.__loader_attributes__ = ['filename', 'metadata']
            ds_item.set_file(out_file, storage=True)
            img_list.append(ds_item)
        except Exception as e:
            invalid.append((type(e).__name__, e.message, f))

    return img_list, invalid, out_dir