Beispiel #1
0
 def run_benchmark_iter(self,sizes=None):
   '''return a table of runtimes for all functions for a range of data sizes'''
   group = self.group
   if sizes is None:
     sizes = group.bench_sizes
   rawdata = self.make_data(group.nin,group.itypes,max(sizes))
   out = group._prep_testdata(*rawdata)
   if isinstance(out,tuple):
     alldata,kwargs = out
   else:
     alldata,kwargs = out,{}
   names = [name for (name,func) in self.testfuncs]
   perfseen = UniTable(keys=['_n_']+names)
   skiplist = []
   for i,size in enumerate(sizes):
     perf = {'_n_':size}
     reference = None
     for name,func in self.testfuncs:
       if name in skiplist:
         perf[name] = self.NA
         continue
       timings,results = self.run_tests(func,alldata,[size],kwargs=kwargs)
       rate = timings[0]
       perf[name] = rate
       if self.too_long(rate=rate):
         skiplist.append(name)
       if reference is None:
         reference = results
       elif results is not None:
         self.compare_data(reference,results,name)
     perfseen.append(perf)
     yield perfseen.copy()
Beispiel #2
0
    def __init__(self, filename, target, inputs=None, threshold=1.0e-9):
        CountsBase.__init__(self, threshold=threshold)
        if isinstance(filename, UniTable):
            data = filename
        else:
            data = UniTable().fromfile(filename)
        self.model_build(data, target, inputs)

        self.verify_result = verify = UniTable()
        verify['orig'] = data[self.target]
        verify['pred'] = self.model_predict(data)
        verify['agree'] = verify['orig'] == verify['pred']
        self.accuracy = float(verify['agree'].sum()) / len(data)
Beispiel #3
0
 def run_benchmark(self,sizes=None):
   '''return a table of runtimes for all functions for a range of data sizes'''
   group = self.group
   if sizes is None:
     sizes = group.bench_sizes
   performance = UniTable()
   performance['_n_'] = sizes
   rawdata = self.make_data(group.nin,group.itypes,max(sizes))
   out = group._prep_testdata(*rawdata)
   if isinstance(out,tuple):
     alldata,kwargs = out
   else:
     alldata,kwargs = out,{}
   reference = None
   for (name,func) in self.testfuncs:
     timings,results = self.run_tests(func,alldata,sizes,kwargs=kwargs)
     if len(timings) == len(performance):
       performance[name] = timings
     else:
       missing = len(performance)-len(timings)
       performance[name] = list(timings) + [self.NA]*missing
       results = list(results) + [None]*missing
     if reference is None:
       reference = results
     elif results is not None:
       self.compare_data(reference,results,name)
   return performance
Beispiel #4
0
def top_ten(filenames):

    # track values for each field
    seen_fields = {}
    total_recs = 0

    # read each file in turn
    for filename in filenames:
        tbl = UniTable()
        tbl.fromfile(filename)

        keys = tbl.keys()[:]
        if '_count_' in keys:
            total_recs += tbl['_count_'].sum()
            keys.remove('_count_')
        else:
            total_recs += len(tbl)
            tbl['_count_'] = 1

        # read each column in turn
        for key in keys:
            seen_values = seen_fields.setdefault(key, {})

            # iterate over counts and values
            for cnt, value in izip(tbl['_count_'], tbl[key]):
                try:
                    seen_values[value] += cnt
                except KeyError:
                    seen_values[value] = cnt

    # report results
    for key, seen_values in seen_fields.items():

        # find top ten
        top_cnts = sorted(seen_values.values())
        cutoff = top_cnts[-10:][0]
        tmp = sorted([cnt, value] for (value, cnt) in seen_values.items()
                     if cnt >= cutoff)
        top = reversed(tmp[-10:])

        # report
        print 'Field:', key
        for (cnt, value) in top:
            percent = 100.0 * cnt / float(total_recs)
            print '\t(%8.5f%%) %r' % (percent, value)
Beispiel #5
0
def top_ten(filenames):

  # track values for each field
  seen_fields = {}
  total_recs = 0

  # read each file in turn
  for filename in filenames:
    tbl = UniTable()
    tbl.fromfile(filename)

    keys = tbl.keys()[:]
    if '_count_' in keys:
      total_recs += tbl['_count_'].sum()
      keys.remove('_count_')
    else:
      total_recs += len(tbl)
      tbl['_count_'] = 1

    # read each column in turn
    for key in keys:
      seen_values = seen_fields.setdefault(key,{})

      # iterate over counts and values
      for cnt,value in izip(tbl['_count_'],tbl[key]):
        try:
          seen_values[value] += cnt
        except KeyError:
          seen_values[value] = cnt

  # report results
  for key,seen_values in seen_fields.items():

    # find top ten
    top_cnts = sorted(seen_values.values())
    cutoff = top_cnts[-10:][0]
    tmp = sorted([cnt,value] for (value,cnt) in seen_values.items() if cnt >= cutoff)
    top = reversed(tmp[-10:])

    # report
    print 'Field:', key
    for (cnt,value) in top:
      percent = 100.0*cnt/float(total_recs)
      print '\t(%8.5f%%) %r' % (percent,value)
Beispiel #6
0
 def __init__(self,filename=None,keys=[]):
   self.keys = keys
   self.data = data = {'':0}  # try to pre-assign empty string value
   self.filename = filename
   if filename and os.path.exists(filename):
     from augustus.kernel.unitable import UniTable
     tbl = UniTable().fromfile(filename)
     for i,value in it.izip(tbl['index'],tbl['data']):
       data[value] = i
     del tbl
Beispiel #7
0
 def _make_tbl(self, cfunc, ccfunc):
     out = UniTable()
     ikvlist = list(self.iter_ikv())
     out['__fld__'] = [''] + [ikv[0] for ikv in ikvlist]
     out['__val__'] = [''] + [ikv[1] for ikv in ikvlist]
     for tval in self.all_tval():
         value = cfunc(tval)
         ikv_vals = [ccfunc(tval, ikey, ival) for (ikey, ival) in ikvlist]
         out[str(tval)] = [value] + ikv_vals
     return str(out)
Beispiel #8
0
 def test1(self):
     data = {
         'a': (1, 2, 3),
         'ts': (34567, 35678, 34657),
         'values': (5.4, 2.2, 9.9)
     }
     keyorder = ('a', 'ts', 'values')
     t = UniTable(keys=keyorder, **data)
     rec = t[0]
     assert rec[0] == 1
Beispiel #9
0
 def receive_unitable(self):
     _csvargs = {}
     if self.types is not None:
         _csvargs['types'] = self.types
     try:
         if self.header is None:
             if self.sep is None:
                 #No special treatment needed
                 if ((len(_csvargs) == 0) and (self.ffConvert is None)):
                     return UniTable().fromfile(self.handle)
                 elif (self.ffConvert is not None):
                     # Jonathan's clean solution:
                     fields = self.ffConvert.fields
                     return UniTable().from_fixed_width_file(
                         self.handle, fields)
                 else:
                     return UniTable().from_csv_file(
                         self.handle, **_csvargs)
             else:
                 return UniTable().from_csv_file(self.handle,
                                                 insep=self.sep,
                                                 **_csvargs)
         else:
             if self.sep is None:
                 return UniTable().from_csv_file(self.handle,
                                                 header=self.header,
                                                 **_csvargs)
             else:
                 return UniTable().from_csv_file(self.handle,
                                                 header=self.header,
                                                 insep=self.sep,
                                                 **_csvargs)
     except:
         return None
Beispiel #10
0
 def flush(self):
   if self.filename and len(self.data) > 1:
     from augustus.kernel.unitable import UniTable
     tbl = UniTable(keys=['index','data'])
     tmp = self.data.items()
     tbl['index'] = [x[1] for x in tmp]
     tbl['data'] = [x[0] for x in tmp]
     del tmp
     tbl.sort_on('index')
     tbl.to_csv_file(self.filename)
     del tbl
Beispiel #11
0
 def receive_unitable(self):
   _csvargs={}
   if self.types is not None:
     _csvargs['types'] = self.types
   if self.sep is not None:
     _csvargs['insep'] = self.sep
   if self.header is not None:
     _csvargs['header'] = self.header
   try:
     if self.header is None:
       if self.sep is None:
         #No special treatment needed
         if ((len(_csvargs) == 0) and (self.ffConvert is None)):
           u = UniTable()
           if self.framing != 'EOF':
             #New size-framed stream
             u.fromfile(self.handle, bufferFramed=True, chunksize=self.chunksize)
             d = u.get_csv_dialect()
             self.sep = d.delimiter
             self.header = self.sep.join(u.keys())
           else:
             # Traditional file-framed:
             u.fromfile(self.handle, bufferFramed=False, chunksize=self.chunksize)
           return u
         elif self.ffConvert is not None:  
           # Jonathan's clean solution:
           fields = self.ffConvert.fields
           return UniTable().from_fixed_width_file(self.handle, fields)          
         else:
           return UniTable().from_csv_file(self.handle,**_csvargs)
       else:
         u = UniTable()
         if self.framing != 'EOF':
           #New size-framed stream
           u.fromfile(self.handle, bufferFramed=True, insep = self.sep, chunksize=self.chunksize, **_csvargs)
           self.header = self.sep.join(u.keys())
         else:
           # Traditional file-framed:
           u.fromfile(self.handle, insep = self.sep, bufferFramed=False, chunksize=self.chunksize, **_csvargs)
         return u
         #return UniTable().from_csv_file(self.handle, insep = self.sep, **_csvargs)
     else:
       if self.framing != 'EOF':
         # A header exists so a prior read has been made.
         if self.sep is None:
           return UniTable().from_csv_file(self.handle, bufferFramed=True, chunksize=self.chunksize, header = self.header, **_csvargs)
         else:
           return UniTable().from_csv_file(self.handle, bufferFramed=True, chunksize=self.chunksize, header = self.header, insep = self.sep, **_csvargs)
       else:
           return UniTable().from_csv_file(self.handle, bufferFramed=False, **_csvargs)
   except Exception, inst:
     #print "Exception is: {0}".format(type(inst))
     #print inst.args
     #print inst
     return None