def diffkeys_explore(self, key1, key2, dump_sample=0): '''return list of difference observations comparing two fields''' out = [] vals = self.fields(key1, key2) notequal = vals[0] != vals[1] diffcnt = notequal.sum() if diffcnt == 0: return out tcodes = [get_format(val) for val in vals] tchars = [tcode[0] for tcode in tcodes] # are all values within floating point fuzz? if not tchars.count('a') and na.allclose(*vals): out.append('no differences using allclose()') # real differences exist out.append('%s values differ (%1.2f%% of %s)' % (diffcnt, 100.0 * diffcnt / len(self), len(self))) if tcodes[0] != tcodes[1]: out.append('field types differ %s' % str(tuple(tcodes))) # skip detail if any field is alpha type if tchars.count('a'): return out # differences as different types? for typestr in ('Int', 'Bool'): tvals = [v.astype(typestr) for v in vals] if na.allclose(*tvals): out.append('field values match as type(%r)' % typestr) # extract differences and examine in greater detail dvals = [any_compress(notequal, val) for val in vals] nzmask = [(dval != 0) for dval in dvals] if (nzmask[0] != nzmask[1]).sum() == 0: # all zeros match, compare the nonzero values nzvals = [any_compress(nzmask[0], dval) for dval in dvals] ratio = nzvals[1].astype('Float') / nzvals[0].astype('Float') factor = ratio.mean() if na.allclose(ratio, factor): out.append('field values differ by constant factor: %f' % factor) delta = dvals[0] - dvals[1] dmin, dmax = delta.min(), delta.max() out.append('difference mean=%f range=%f (%s to %s)' % (delta.mean(), dmax - dmin, dmin, dmax)) if dump_sample: tmp = UniTable() tmp['_idx_'] = diffidx = na.nonzero(notequal)[0] for key in (key1, key2): tmp[key] = self.field(key)[notequal] if len(tmp) > dump_sample: tmp.resize(dump_sample) out.extend(str(tmp).split('\n')) return out
def diffkeys_explore(self,key1,key2,dump_sample=0): '''return list of difference observations comparing two fields''' out = [] vals = self.fields(key1,key2) notequal = vals[0] != vals[1] diffcnt = notequal.sum() if diffcnt == 0: return out tcodes = [get_format(val) for val in vals] tchars = [tcode[0] for tcode in tcodes] # are all values within floating point fuzz? if not tchars.count('a') and na.allclose(*vals): out.append('no differences using allclose()') # real differences exist out.append('%s values differ (%1.2f%% of %s)' % ( diffcnt,100.0*diffcnt/len(self),len(self))) if tcodes[0] != tcodes[1]: out.append('field types differ %s' % str(tuple(tcodes))) # skip detail if any field is alpha type if tchars.count('a'): return out # differences as different types? for typestr in ('Int','Bool'): tvals = [v.astype(typestr) for v in vals] if na.allclose(*tvals): out.append('field values match as type(%r)' % typestr) # extract differences and examine in greater detail dvals = [any_compress(notequal,val) for val in vals] nzmask = [(dval != 0) for dval in dvals] if (nzmask[0] != nzmask[1]).sum() == 0: # all zeros match, compare the nonzero values nzvals = [any_compress(nzmask[0],dval) for dval in dvals] ratio = nzvals[1].astype('Float') / nzvals[0].astype('Float') factor = ratio.mean() if na.allclose(ratio,factor): out.append('field values differ by constant factor: %f' % factor) delta = dvals[0] - dvals[1] dmin,dmax = delta.min(),delta.max() out.append('difference mean=%f range=%f (%s to %s)' % ( delta.mean(),dmax-dmin,dmin,dmax)) if dump_sample: tmp = UniTable() tmp['_idx_'] = diffidx = na.nonzero(notequal)[0] for key in (key1,key2): tmp[key] = self.field(key)[notequal] if len(tmp) > dump_sample: tmp.resize(dump_sample) out.extend(str(tmp).split('\n')) return out
def compress(self,mask): '''return new table containing only rows where mask is nonzero''' out = self._new_hook() for key in self._keys: out[key] = any_compress(mask,self._data[key]) out._postinit_hook() return out
def compress(self, mask): '''return new table containing only rows where mask is nonzero''' out = self._new_hook() for key in self._keys: out[key] = any_compress(mask, self._data[key]) out._postinit_hook() return out