def make_dataset_item(filename, metadata=None, base_dir=None, data_dir=None, out_dir=None): """ Create an item for dataset filename: the item `filename` attribute metadata: the item `metadata` attribute - default: empty Mapping object base_dir: base directory of others - default: filename dir data_dir: directory of filename - default: base_dir out_dir: directory for output - default: base_dir item data file has the following set: - attribute 'filename': the given value - attribute '__key__': the filename with data_dir & extension removed - item file (used by dump&load): out_dir/__key__+'.namespace' - __storage__ (for external attribute): out_dir/__key__+'_{}' returns the constructed dataset item """ import os file_dir, file_base = os.path.split(filename) file_base, file_ext = os.path.splitext(file_base) if metadata is None: metadata = _Mapping() if base_dir is None: base_dir = os.path.dirname(filename) if data_dir is None: data_dir = base_dir if out_dir is None: out_dir = base_dir file_base = filename[len(data_dir):].strip(os.sep) key = os.path.splitext(file_base)[0] item_file = os.path.join(out_dir, key) + '.namespace' map_store = os.path.join(out_dir, key) + '_{}' item = _Mapping(filename=filename, metadata=metadata, __key__=key) item.__loader_attributes__ = ['filename', 'metadata'] item.set_file(item_file, storage=True) return item
def _load_ini_file(ini_file): """ return the ini file content as a hierarchy of Mapping objects can manage: - multilevel key name - inheritance of *previous* section: Example:: [section1] a=1 [section2:section1] subsection.value=42 returns a Mapping containing: section1 a=1 section2 a=1 subsection value=42 """ import ConfigParser as cfg ini = cfg.ConfigParser() ini.read(ini_file) m = _Mapping() for section in ini.sections(): s, parent = (section + ':').split(':')[:2] if len(parent): parent = m[parent] else: parent = {} m[s] = _Mapping(**parent) for k, v in ini.items(section): _add_multilevel_key_value(m[s], k, _param_eval(v)) return m
def match_plants(self, max_distance=None): """ Find a 1-to-1 matching between ref & cmp trees from geometric distance of the seeds. It adds the following attributes to this object: - plant_map: a dictionary of (ref-plant-id:cmp-plant-id) pairs - plant_missed: the number of unmatched plants This method loads the ref&cmp trees - call `clear()` to unload them. """ def distance_matrix(x1,y1,x2,y2): x1 = x1.reshape(-1,1) y1 = y1.reshape(-1,1) x2 = x2.reshape(1,-1) y2 = y2.reshape(1,-1) return ((x1-x2)**2 + (y1-y2)**2)**.5 # match root plant w.r.t seed position # ------------------------------------ def seed_position(t): """ output: plant-id, x, y """ mask = t.segment.seed>0 nseed = t.segment.node[mask] lseed = t.segment.seed[mask] mask = nseed.all(axis=1) # remove bg segment nseed = nseed[mask] lseed = lseed[mask] pid = np.unique(lseed) x = nd.mean(t.node.x()[nseed],labels=lseed.reshape(-1,1),index=pid) y = nd.mean(t.node.y()[nseed],labels=lseed.reshape(-1,1),index=pid) return pid,x,y rpid, rx, ry = seed_position(self.get('ref')) cpid, cx, cy = seed_position(self.get('cmp')) d = distance_matrix(rx,ry,cx,cy) ##s1 = set(zip(range(d.shape[0]),np.argmin(d,axis=1))) ##s2 = set(zip(np.argmin(d,axis=0),range(d.shape[1]))) ##match = s1.intersection(s2) match,r_unmatch,c_unmatch = direct_matching(d,max_d=max_distance) self.mapping = _Mapping() self.mapping.plant = dict((rpid[p1],cpid[p2]) for p1,p2 in match) self.mapping.plant_missed_ref = [rpid[i] for i in r_unmatch] self.mapping.plant_missed_cmp = [cpid[i] for i in c_unmatch]
def compute_tree_stat(tree, stat_names='all', mask=None, save=True): """ compute all statistic listed in stat_name, using optional mask filter function""" if stat_names == 'all': run = stat_list else: run = dict([(n, stat_list[n]) for n in stat_names]) stat = _Mapping() for name, fct in run.iteritems(): stat[name] = fct(tree, mask=mask) tree.stat = stat if save: tree.dump() return tree
def _plot_tc(tc,x,y,plant_id, title,xlabel,ylabel, split=False, merge_unique=False, content='scatter', legend=str, print_fct=None, cla=True): """ actual ploting done by `plot_stat` and `plot_compare` legend: a function that convert label into string content: either 'scatter' or 'box' """ import matplotlib.pyplot as plt from matplotlib import pylab from matplotlib.backends import backend, interactive_bk if cla: plt.cla() if split is None: plt.plot(x, y, '.') if print_fct: print_fct(title,x,y) else: if isinstance(split,basestring): split = ['metadata'] + split.split('.') label = [reduce(getattr, [t]+split) for t in tc] label = np.array(label) label_set = np.unique(label) else: label=[] for spl in split: spl = ['metadata'] + spl.split('.') label.append([reduce(getattr, [t]+spl) for t in tc]) from rhizoscan.ndarray import unique_rows label = np.array(label).T label_set = unique_rows(label) if content=='scatter': color = ['b','g','r','c','m','y','k'] for i,lab in enumerate(label_set): lab_mask = label==lab if lab_mask.ndim>1: lab_mask = lab_mask.all(axis=-1) yi = y[lab_mask] xi = x[lab_mask] if merge_unique: pos = np.concatenate([xi[:,None],yi[:,None]],axis=-1) pos = np.ascontiguousarray(pos).view([('x',pos.dtype),('y',pos.dtype)]) v,s = np.unique(pos, return_inverse=1) size = np.bincount(s) xi,yi = v['x'],v['y'] else: size = 2 label_str = legend(lab) colori = color[i%len(color)] plt.scatter(xi, yi, s=8*size, c=colori, edgecolors='none', label=label_str) if print_fct: print_fct(title+' - '+label_str, xi,yi) plt.legend(loc=0) else: # if content=='box': boxes = [] names = [] for i,lab in enumerate(label_set): lab_mask = label==lab if lab_mask.ndim>1: lab_mask = lab_mask.all(axis=-1) #xi = x[lab_mask] boxes.append(y[lab_mask]) names.append(legend(lab)) bp = plt.boxplot(boxes) for f in bp['fliers']: f.remove() plt.xticks(range(1,len(names)+1),names) ax = plt.gca() ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) if backend in interactive_bk: ax.tree_data = _Mapping(title=title,x=x,y=y,tc=tc, plant_id=plant_id) flag = '_ROOT_MEASUREMENT_CB' if not hasattr(plt.gcf(),flag): cid = pylab.connect('button_press_event', _plot_axe_selected_tree) setattr(plt.gcf(),flag,cid) return ax
def add_comparison(self,name,value): self.setdefault('comparison',_Mapping())[name] = value
def assert_mapping(subm, k): if not subm.has_key(k) or not hasattr(subm[k], 'iteritems'): subm[k] = _Mapping() return subm[k]
def make_dataset(ini_file, base_dir=None, data_dir=None, out_dir=None, verbose=False): """ Return a list of dataset item following parsing rules found in `ini_file` :Inputs: - `ini_file`: file with ini-formated content indicating the dataset to be loaded See the 'ini format' section for details - `base_dir`: Starting directories for inputs and outputs (see 'directories' below) If not given, use the directory of given `ini_file` - `data_dir`: Directories to look for data inputs (see 'ini format' below) If not given, use the value in the ini file, or `base_dir` If it is not an absolute path, preppend it with `base_dir` - `out_dir`: Directories to set output into (see 'directories' below) If not given, use the value in the ini file, or `base_dir` If it is not an absolute path, preppend it with `base_dir` - `verbose`: If >0, print some message on loaded dataset :Outputs: - A list of `Mapping` object, one for each file found with suitable output files configure (nothing is saved). Each contains the attributes: - 'filename': the found file name - 'metadata': constructed from the ini file - '__key__': an id key made from 'filename' with `data_dir` removed - The list of files found but which could not be parsed - The base output directory to all item (see 'directories' below) :directories: All output files and directories are set following the values of given `base_dir`, `data_dir`, and `out_dir` The associated file of output `Mapping` items are set to: "[out_dir]/[item-end].namespace" The output items have their FileStorage set (see Mapping doc) to: "[out_dir]/[item-end]_{}" Where `item-end` is the remaining part of the filename of found items after removing `data_dir` from the start and the file extension. See datastruture.Data and Mapping documentations for details on `Data` associated file and `Mapping` FileStorage :ini format: ##todo """ import os from os.path import join as pjoin from os.path import splitext, dirname, exists import re from time import strptime from glob import glob from rhizoscan.misc.path import abspath if not exists(ini_file): raise TypeError('input "ini_file" does not exist') # load content of ini file ini = _load_ini_file(ini_file) if verbose > 2: print 'loaded ini:' print ini.multilines_str(tab=1) # directory variable if base_dir is None: base_dir = dirname(abspath(ini_file)) if data_dir is None: data_dir = ini['PARSING'].get('data_dir') if not data_dir: data_dir = base_dir data_dir = abspath(data_dir, base_dir) if out_dir is None: out_dir = ini['PARSING'].get('out_dir') if not out_dir: out_dir = base_dir out_dir = abspath(out_dir, base_dir) # find all files that fit pattern given in ini_file # ------------------------------------------------- # list all suitable files file_pattern = ini['PARSING']['pattern'] file_pattern = file_pattern.replace('\\', '/') # for windows file_pattern = re.split('[\[\]]', file_pattern) glob_pattern = pjoin(data_dir, '*'.join(file_pattern[::2])) file_list = sorted(glob(glob_pattern)) if verbose: print 'glob:', glob_pattern if verbose > 1: print ' ' + '\n '.join(file_list) # prepare metatata parser # ----------------------- # meta data list and regular expression to parse file names group_re = dict(int='([0-9]*)', float='([-+]?[0-9]*\.?[0-9]+)') ##meta_parser = re.compile('(.*)'.join([fp.replace('*','.*') for fp in file_pattern[::2]])) meta_list = [m.split(':') for m in file_pattern[1::2]] meta_list = [m if len(m) > 1 else m + ['str'] for m in meta_list] meta_parser = file_pattern[:] meta_parser[1::2] = [ group_re.get(mtype, '(.*)') for name, mtype in meta_list ] meta_parser = re.compile(''.join(meta_parser)) meta_list = [_Mapping(name=name, type=mtype) for name, mtype in meta_list] date_pattern = ini['PARSING'].get('date', '') ## to remove? types = dict(int=int, float=float, str=str) for m in meta_list: if m.type == 'date': ## to remove? m.eval = lambda s: strptime(s, date_pattern) elif m.type == "$": default = m.name + '_default' m.eval = lambda name: ini.get(name, default=default) else: try: m.eval = types[m.type] except KeyError: raise KeyError('unrecognized parsing type %s for field %s' % (m.type, m.name)) default_meta = ini.get('metadata', {}) for k, v in default_meta.iteritems(): default_meta[k] = _param_eval(v) # if grouping if ini['PARSING'].has_key('group'): g = ini['PARSING']['group'] ## eval... value is a dict dlist = [dirname(fi) for fi in file_list] fenum = [int(di == dlist[i]) for i, di in enumerate(dlist[1:])] # diff of dlist fenum = _np.array( reduce(lambda L, y: L + [(L[-1] + y) * y], [[0]] + fenum)) # ind of file in resp. dir. group = _np.zeros(fenum.max() + 1, dtype='|S' + str(max([len(gi) for gi in g.itervalues()]))) for start in sorted(g.keys()): group[start:] = [g[start]] * (len(group) - start) group = group[fenum] if verbose: print 'group:', g if verbose > 1: print ' detected:', group else: group = None if verbose: print 'metadata:', meta_parser.pattern, print '> ' + ', '.join((m.name + ':' + m.type for m in meta_list)) # get global variable global_attr = ini.get('global', {}) # parse all image files, set metadata and remove invalid # ------------------------------------------------------ img_list = Dataset() invalid = [] rm_len = len( data_dir) ## imply images are in base_dir. is there a more general way for ind, f in enumerate(file_list): try: if rm_len > 0: subf = f[rm_len + 1:] else: subf = f subf = subf.replace('\\', '/') # for windows fkey = splitext(subf)[0] out_file = pjoin(out_dir, fkey) + '.namespace' meta_value = meta_parser.match(subf).groups() if verbose > 1: print ' ' + str(meta_value) + ' from ' + subf + str(rm_len) meta = _Mapping(**default_meta) if group is not None: meta.update(ini.get(group[ind], default=[])) for i, value in enumerate(meta_value): field = meta_list[i].name value = meta_list[i].eval(value) if field == '$': meta.update(value) else: _add_multilevel_key_value(meta, field, value) ds_item = _Mapping(filename=f, metadata=meta, __key__=fkey, **global_attr) ds_item.__loader_attributes__ = ['filename', 'metadata'] ds_item.set_file(out_file, storage=True) img_list.append(ds_item) except Exception as e: invalid.append((type(e).__name__, e.message, f)) return img_list, invalid, out_dir