def handle(self,tbl,opt): if opt.diff is None: return if os.path.isdir(opt.diff): trylocs = [os.path.join(opt.diff,os.path.basename(opt.input))] if not os.path.isabs(opt.input) and os.path.dirname(opt.input): trylocs.append(os.path.join(opt.diff,opt.input)) for other in trylocs: if os.path.exists(other): break else: other = opt.diff # prepare labels, giving preference to basenames if unique filename = [other,opt.input] label = [os.path.basename(x) for x in filename] if label[0] == label[1]: label = filename ref = UniTable().fromfile(other) out = ref.diff(tbl,label1=label[0],label2=label[1],dump_sample=opt.diff_dump) if out: print '***** files differ: %s %s' % tuple(filename) print out print # allow multi-file processing # sys.exit(1) return False
def handle(self, tbl, opt): if opt.diff is None: return if os.path.isdir(opt.diff): trylocs = [os.path.join(opt.diff, os.path.basename(opt.input))] if not os.path.isabs(opt.input) and os.path.dirname(opt.input): trylocs.append(os.path.join(opt.diff, opt.input)) for other in trylocs: if os.path.exists(other): break else: other = opt.diff # prepare labels, giving preference to basenames if unique filename = [other, opt.input] label = [os.path.basename(x) for x in filename] if label[0] == label[1]: label = filename ref = UniTable().fromfile(other) out = ref.diff(tbl, label1=label[0], label2=label[1], dump_sample=opt.diff_dump) if out: print '***** files differ: %s %s' % tuple(filename) print out print # allow multi-file processing # sys.exit(1) return False
def run_benchmark_iter(self,sizes=None): '''return a table of runtimes for all functions for a range of data sizes''' group = self.group if sizes is None: sizes = group.bench_sizes rawdata = self.make_data(group.nin,group.itypes,max(sizes)) out = group._prep_testdata(*rawdata) if isinstance(out,tuple): alldata,kwargs = out else: alldata,kwargs = out,{} names = [name for (name,func) in self.testfuncs] perfseen = UniTable(keys=['_n_']+names) skiplist = [] for i,size in enumerate(sizes): perf = {'_n_':size} reference = None for name,func in self.testfuncs: if name in skiplist: perf[name] = self.NA continue timings,results = self.run_tests(func,alldata,[size],kwargs=kwargs) rate = timings[0] perf[name] = rate if self.too_long(rate=rate): skiplist.append(name) if reference is None: reference = results elif results is not None: self.compare_data(reference,results,name) perfseen.append(perf) yield perfseen.copy()
def __init__(self, rules=None, *args, **kwargs): UniTable.__init__(self, *args, **kwargs) if not isinstance(rules, Rules): rules = Rules(rules) self._rules = rules self._rulearg = self.as_mapping() self._made = {} self._inprog = []
def handle(self, tbl, opt): if opt.add_index is None: return out = UniTable() out[opt.add_index] = range(len(tbl)) out.update(tbl) return out
def handle(self,tbl,opt): if opt.add_index is None: return out = UniTable() out[opt.add_index] = range(len(tbl)) out.update(tbl) return out
def run_benchmark(self,sizes=None): '''return a table of runtimes for all functions for a range of data sizes''' group = self.group if sizes is None: sizes = group.bench_sizes performance = UniTable() performance['_n_'] = sizes rawdata = self.make_data(group.nin,group.itypes,max(sizes)) out = group._prep_testdata(*rawdata) if isinstance(out,tuple): alldata,kwargs = out else: alldata,kwargs = out,{} reference = None for (name,func) in self.testfuncs: timings,results = self.run_tests(func,alldata,sizes,kwargs=kwargs) if len(timings) == len(performance): performance[name] = timings else: missing = len(performance)-len(timings) performance[name] = list(timings) + [self.NA]*missing results = list(results) + [None]*missing if reference is None: reference = results elif results is not None: self.compare_data(reference,results,name) return performance
def __init__(self,filename=None,keys=[]): self.keys = keys self.data = data = {'':0} # try to pre-assign empty string value self.filename = filename if filename and os.path.exists(filename): from unitable import UniTable tbl = UniTable().fromfile(filename) for i,value in it.izip(tbl['index'],tbl['data']): data[value] = i del tbl
def fld_summary(tbl): out = UniTable() keys = tbl.keys() out['@'] = range(1,len(keys)+1) out['field'] = keys out['typecode'] = [re.sub(r'^a(.*)',r'Char(\1)',fmt) for fmt in tbl.get_type_codes()] out['size'] = [col.itemsize() for col in tbl.values()] nmasked = [count_masked(col) for col in tbl.values()] if sum(nmasked): out['masked'] = [str(n) for n in nmasked] masked = out['masked'] masked[masked == '0'] = '' return out
def flush(self): if self.filename and len(self.data) > 1: from unitable import UniTable tbl = UniTable(keys=['index','data']) tmp = self.data.items() tbl['index'] = [x[1] for x in tmp] tbl['data'] = [x[0] for x in tmp] del tmp tbl.sort_on('index') tbl.to_csv_file(self.filename) del tbl
def handle(tbl, opt): if opt.expand is None: return cntkey = opt.expand keys = tbl.keys()[:] try: keys.remove(cntkey) except ValueError: log.warning('cannot expand table, has no count column: %r', cntkey) return ntbl = UniTable(keys=keys) xcnt = tbl[cntkey] for key in keys: oval = tbl[key] nval = [] for cnt, val in it.izip(xcnt, oval): for i in range(cnt): nval.append(val) ntbl[key] = nval return ntbl