def run_benchmark_iter(self,sizes=None): '''return a table of runtimes for all functions for a range of data sizes''' group = self.group if sizes is None: sizes = group.bench_sizes rawdata = self.make_data(group.nin,group.itypes,max(sizes)) out = group._prep_testdata(*rawdata) if isinstance(out,tuple): alldata,kwargs = out else: alldata,kwargs = out,{} names = [name for (name,func) in self.testfuncs] perfseen = UniTable(keys=['_n_']+names) skiplist = [] for i,size in enumerate(sizes): perf = {'_n_':size} reference = None for name,func in self.testfuncs: if name in skiplist: perf[name] = self.NA continue timings,results = self.run_tests(func,alldata,[size],kwargs=kwargs) rate = timings[0] perf[name] = rate if self.too_long(rate=rate): skiplist.append(name) if reference is None: reference = results elif results is not None: self.compare_data(reference,results,name) perfseen.append(perf) yield perfseen.copy()
def __init__(self, filename, target, inputs=None, threshold=1.0e-9): CountsBase.__init__(self, threshold=threshold) if isinstance(filename, UniTable): data = filename else: data = UniTable().fromfile(filename) self.model_build(data, target, inputs) self.verify_result = verify = UniTable() verify['orig'] = data[self.target] verify['pred'] = self.model_predict(data) verify['agree'] = verify['orig'] == verify['pred'] self.accuracy = float(verify['agree'].sum()) / len(data)
def run_benchmark(self,sizes=None): '''return a table of runtimes for all functions for a range of data sizes''' group = self.group if sizes is None: sizes = group.bench_sizes performance = UniTable() performance['_n_'] = sizes rawdata = self.make_data(group.nin,group.itypes,max(sizes)) out = group._prep_testdata(*rawdata) if isinstance(out,tuple): alldata,kwargs = out else: alldata,kwargs = out,{} reference = None for (name,func) in self.testfuncs: timings,results = self.run_tests(func,alldata,sizes,kwargs=kwargs) if len(timings) == len(performance): performance[name] = timings else: missing = len(performance)-len(timings) performance[name] = list(timings) + [self.NA]*missing results = list(results) + [None]*missing if reference is None: reference = results elif results is not None: self.compare_data(reference,results,name) return performance
def top_ten(filenames): # track values for each field seen_fields = {} total_recs = 0 # read each file in turn for filename in filenames: tbl = UniTable() tbl.fromfile(filename) keys = tbl.keys()[:] if '_count_' in keys: total_recs += tbl['_count_'].sum() keys.remove('_count_') else: total_recs += len(tbl) tbl['_count_'] = 1 # read each column in turn for key in keys: seen_values = seen_fields.setdefault(key, {}) # iterate over counts and values for cnt, value in izip(tbl['_count_'], tbl[key]): try: seen_values[value] += cnt except KeyError: seen_values[value] = cnt # report results for key, seen_values in seen_fields.items(): # find top ten top_cnts = sorted(seen_values.values()) cutoff = top_cnts[-10:][0] tmp = sorted([cnt, value] for (value, cnt) in seen_values.items() if cnt >= cutoff) top = reversed(tmp[-10:]) # report print 'Field:', key for (cnt, value) in top: percent = 100.0 * cnt / float(total_recs) print '\t(%8.5f%%) %r' % (percent, value)
def top_ten(filenames): # track values for each field seen_fields = {} total_recs = 0 # read each file in turn for filename in filenames: tbl = UniTable() tbl.fromfile(filename) keys = tbl.keys()[:] if '_count_' in keys: total_recs += tbl['_count_'].sum() keys.remove('_count_') else: total_recs += len(tbl) tbl['_count_'] = 1 # read each column in turn for key in keys: seen_values = seen_fields.setdefault(key,{}) # iterate over counts and values for cnt,value in izip(tbl['_count_'],tbl[key]): try: seen_values[value] += cnt except KeyError: seen_values[value] = cnt # report results for key,seen_values in seen_fields.items(): # find top ten top_cnts = sorted(seen_values.values()) cutoff = top_cnts[-10:][0] tmp = sorted([cnt,value] for (value,cnt) in seen_values.items() if cnt >= cutoff) top = reversed(tmp[-10:]) # report print 'Field:', key for (cnt,value) in top: percent = 100.0*cnt/float(total_recs) print '\t(%8.5f%%) %r' % (percent,value)
def __init__(self,filename=None,keys=[]): self.keys = keys self.data = data = {'':0} # try to pre-assign empty string value self.filename = filename if filename and os.path.exists(filename): from augustus.kernel.unitable import UniTable tbl = UniTable().fromfile(filename) for i,value in it.izip(tbl['index'],tbl['data']): data[value] = i del tbl
def _make_tbl(self, cfunc, ccfunc): out = UniTable() ikvlist = list(self.iter_ikv()) out['__fld__'] = [''] + [ikv[0] for ikv in ikvlist] out['__val__'] = [''] + [ikv[1] for ikv in ikvlist] for tval in self.all_tval(): value = cfunc(tval) ikv_vals = [ccfunc(tval, ikey, ival) for (ikey, ival) in ikvlist] out[str(tval)] = [value] + ikv_vals return str(out)
def test1(self): data = { 'a': (1, 2, 3), 'ts': (34567, 35678, 34657), 'values': (5.4, 2.2, 9.9) } keyorder = ('a', 'ts', 'values') t = UniTable(keys=keyorder, **data) rec = t[0] assert rec[0] == 1
def receive_unitable(self): _csvargs = {} if self.types is not None: _csvargs['types'] = self.types try: if self.header is None: if self.sep is None: #No special treatment needed if ((len(_csvargs) == 0) and (self.ffConvert is None)): return UniTable().fromfile(self.handle) elif (self.ffConvert is not None): # Jonathan's clean solution: fields = self.ffConvert.fields return UniTable().from_fixed_width_file( self.handle, fields) else: return UniTable().from_csv_file( self.handle, **_csvargs) else: return UniTable().from_csv_file(self.handle, insep=self.sep, **_csvargs) else: if self.sep is None: return UniTable().from_csv_file(self.handle, header=self.header, **_csvargs) else: return UniTable().from_csv_file(self.handle, header=self.header, insep=self.sep, **_csvargs) except: return None
def flush(self): if self.filename and len(self.data) > 1: from augustus.kernel.unitable import UniTable tbl = UniTable(keys=['index','data']) tmp = self.data.items() tbl['index'] = [x[1] for x in tmp] tbl['data'] = [x[0] for x in tmp] del tmp tbl.sort_on('index') tbl.to_csv_file(self.filename) del tbl
def receive_unitable(self): _csvargs={} if self.types is not None: _csvargs['types'] = self.types if self.sep is not None: _csvargs['insep'] = self.sep if self.header is not None: _csvargs['header'] = self.header try: if self.header is None: if self.sep is None: #No special treatment needed if ((len(_csvargs) == 0) and (self.ffConvert is None)): u = UniTable() if self.framing != 'EOF': #New size-framed stream u.fromfile(self.handle, bufferFramed=True, chunksize=self.chunksize) d = u.get_csv_dialect() self.sep = d.delimiter self.header = self.sep.join(u.keys()) else: # Traditional file-framed: u.fromfile(self.handle, bufferFramed=False, chunksize=self.chunksize) return u elif self.ffConvert is not None: # Jonathan's clean solution: fields = self.ffConvert.fields return UniTable().from_fixed_width_file(self.handle, fields) else: return UniTable().from_csv_file(self.handle,**_csvargs) else: u = UniTable() if self.framing != 'EOF': #New size-framed stream u.fromfile(self.handle, bufferFramed=True, insep = self.sep, chunksize=self.chunksize, **_csvargs) self.header = self.sep.join(u.keys()) else: # Traditional file-framed: u.fromfile(self.handle, insep = self.sep, bufferFramed=False, chunksize=self.chunksize, **_csvargs) return u #return UniTable().from_csv_file(self.handle, insep = self.sep, **_csvargs) else: if self.framing != 'EOF': # A header exists so a prior read has been made. if self.sep is None: return UniTable().from_csv_file(self.handle, bufferFramed=True, chunksize=self.chunksize, header = self.header, **_csvargs) else: return UniTable().from_csv_file(self.handle, bufferFramed=True, chunksize=self.chunksize, header = self.header, insep = self.sep, **_csvargs) else: return UniTable().from_csv_file(self.handle, bufferFramed=False, **_csvargs) except Exception, inst: #print "Exception is: {0}".format(type(inst)) #print inst.args #print inst return None