def header(self,cells): self = self or Rows() self.indep = [] for c0,x in enumerate(cells): if not "?" in x: c = len(self._use) self._use.append(c0) self.name.append(x) if "$" in x or "<" in x or ">" in x: n1 = Num() self.nums[c] = n1.nums([]) else: s1 = Sym() self.syms[c] = s1.syms([]) if "<" in x: self.w[c] = -1 elif ">" in x: self.w[c] = 1 elif "!" in x: self._class = c else: self.indeps.append(c) return self
def testing_Sym(): s1 = Sym() s1 = s1.syms( ['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) print(s1.symEnt()) result = round(s1.symEnt(), 4) assert (result == 0.9403)
def baseSym(): s = Sym() s = s.syms( ['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) print(s.counts) print("Entropy: " + str(round(s.symEnt(), 4))) assert (round(s.symEnt(), 4) == 0.9403)
def __init__(i, a): i.x, i.y, i.all = [], [], [] i.klass = None for at, txt in enumerate(a): new = Skip(at=at, txt=txt) if isSkip(txt) else ( Num(at=at, txt=txt) if isNum(txt) else Sym(at=at, txt=txt)) i.all += [new] if not isSkip(txt): if isX(txt): i.x += [new] if isY(txt): i.y += [new] if isKlass(txt): i.klass = new
def testSample(): """Testing Entropy Sampler""" s = Sym() syms = ['y','y','y','y','y','y','y','y','y', 'n','n','n','n','n'] s.bulkAdd(syms) print("Items = ", syms) print("Entropy = ", '%.4f'%(s.symEnt())) assert math.isclose(s.symEnt(), 0.9403, rel_tol = 0.01)
def header(self, cells): for c0, x in enumerate(cells): if not "?" in x: c = len(self._use) self._use.append(c0) self.name.append(x) if re.search("[<>$]", x): self.nums[c] = Num() else: self.syms[c] = Sym() if re.search("<", x): self.w[c] = -1 elif re.search(">", x): self.w[c] = 1 elif re.search("!", x): self._class = c else: self.indeps.append(c) return self
def header(self, cells): for i, v in enumerate(cells): if not re.match(r'^\?', v): c = len(self._use) self._use.append(i) self.name.append(v) if re.search('[<>$]', v): self.nums[c] = Num([]) else: self.syms[c] = Sym([]) if re.search('<', v): self.w[c] = -1 elif re.search('>', v): self.w[c] = 1 elif re.search('!', v): self._class = c else: self.indeps.append(c)
def header(self, cells): for c0, x in enumerate(cells): if '?' not in x: c = len(self._use) self._use.append(c0) self.name.append(x) if re.search('[<>$]', x): self.nums[c] = Num([]) else: self.syms[c] = Sym([]) if re.match('<', x): self.w[c] = -1 elif re.match(">", x): self.w[c] = 1 elif re.match('!', x): self.Class = c else: self.indeps.append(c) return self
def header(data, cells): for i, x in enumerate(cells): if "%?" not in x: data._use[i] = True data.name.append(x) if re.search(r"[<>$]", x): data.nums[i] = Num() else: data.syms[i] = Sym() if re.search(r"<", x): data.w[i] = -1 elif re.search(r">", x): data.w[i] = 1 elif re.search(r"!", x): data.clss = i else: data.indeps[i] = True else: data._use[i] = False
def header(self, cells): for index, cell in enumerate(cells): if '?' not in cell: self.valid_cols.append(index) self.names[index] = cell if re.match('[<>$]', cell): self.nums[index] = Num() else: self.syms[index] = Sym() if re.match('<', cell): self.weights[index] = -1 elif re.match('>', cell): self.weights[index] = 1 elif re.match('!', cell): self.class_col = index else: self.indeps.append(index)
def header(self, cells): '''Checks for certain symbols at the beginning of the column name and structure then into sym and num objects''' self = self or Rows() self.indep = [] for c0, x in enumerate(cells): if not "?" in x: c = len(self._use) self._use.append(c0) self.name.append(x) # Col names beginning with $,<,> are set as numeric columns if "$" in x or "<" in x or ">" in x: self.nums[c] = Num([]) else: self.syms[c] = Sym([]) if "<" in x: self.w[c] = -1 elif ">" in x: self.w[c] = 1 elif "!" in x: self._class = c else: self.indeps.append(c) return self
def sym_test(): """ Testing sym.py ... """ sym = Sym() sym_list = [ 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n' ] sym.syms(sym_list) print() print('ENT', ':', sym.sym_ent()) print() assert (close(sym.sym_ent(), 0.9403))
def __init__(i, down=-math.inf, up=math.inf): i.down, i.up, i.also = down, up, Sym()
def testSym(): s = Sym( ['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) assert abs(s.symEnt() - 0.9403) / 0.9403 < 0.01 print(s.symEnt())
def sym_test(): s = Sym( ['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) print(s.sym_ent()) assert abs(s.sym_ent() - 0.9403) < 0.0001
def test_sym(): s = Sym(['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) print(s.symEnt()) assert round(s.symEnt(), 4) == 0.9403
# vim: filetype=python ts=2 sw=2 sts=2 et : from sym import Sym s=Sym(all="aaaabbc") assert 4==s.seen["a"] assert 1.378 <= s.spread() <=1.38
def __add__(i, x): for y in items(x): # x could a single thing or list of items if y != my.data.ignore: if not i.has: i.has = Num() if nump(y) else Sym() i.has + y
def testSym(): sy = Sym( ['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'n', 'n', 'n', 'n', 'n']) assert round(sy.symEnt(), 4) == 0.9403