def ReadQuantExamples(inFile): """ reads the examples from a .qdat file **Arguments** - inFile: a file object **Returns** a 2-tuple containing: 1) the names of the examples 2) a list of lists containing the examples themselves **Note** because this is reading a .qdat file, it assumed that all variable values are integers """ expr1 = re.compile(r'^#') expr2 = re.compile(r'[\ ]*|[\t]*') examples = [] names = [] inLine = inFile.readline() while inLine: if expr1.search(inLine) is None: resArr = expr2.split(inLine) if len(resArr)>1: examples.append(list(map(lambda x: int(x),resArr[1:]))) names.append(resArr[0]) inLine = inFile.readline() return names,examples
def ReadQuantExamples(inFile): """ reads the examples from a .qdat file **Arguments** - inFile: a file object **Returns** a 2-tuple containing: 1) the names of the examples 2) a list of lists containing the examples themselves **Note** because this is reading a .qdat file, it assumed that all variable values are integers """ expr1 = re.compile(r'^#') expr2 = re.compile(r'[\ ]*|[\t]*') examples = [] names = [] inLine = inFile.readline() while inLine: if expr1.search(inLine) is None: resArr = expr2.split(inLine) if len(resArr) > 1: examples.append(list(map(lambda x: int(x), resArr[1:]))) names.append(resArr[0]) inLine = inFile.readline() return names, examples
def testMultSplit3(self): """ 4 possible results """ d = [(1.,0), (1.1,0), (1.2,0), (1.4,2), (1.4,2), (1.6,2), (2.,2), (2.1,1), (2.1,1), (2.1,1), (2.2,1), (2.3,1), (3.0,3), (3.1,3), (3.2,3), (3.3,3)] varValues, resCodes = zip(*d) nPossibleRes =4 res = Quantize.FindVarMultQuantBounds(varValues,3,resCodes,nPossibleRes) target = ([1.30, 2.05, 2.65],1.97722) assert Quantize.feq(res[1],target[1],1e-4),\ 'InfoGain comparison failed: %s != %s'%(res[1],target[1]) assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\ 'split bound comparison failed: %s != %s'%(res[0],target[0])
def CalcNPossibleUsingMap(data, order, qBounds, nQBounds=None): """ calculates the number of possible values for each variable in a data set **Arguments** - data: a list of examples - order: the ordering map between the variables in _data_ and _qBounds_ - qBounds: the quantization bounds for the variables **Returns** a list with the number of possible values each variable takes on in the data set **Notes** - variables present in _qBounds_ will have their _nPossible_ number read from _qbounds - _nPossible_ for other numeric variables will be calculated """ numericTypes = [int, float] if six.PY2: numericTypes.append(long) print('order:', order, len(order)) print('qB:', qBounds) #print('nQB:',nQBounds, len(nQBounds)) assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\ 'order/qBounds mismatch' nVars = len(order) nPossible = [-1] * nVars cols = range(nVars) for i in xrange(nVars): if nQBounds and nQBounds[i] != 0: nPossible[i] = -1 cols.remove(i) elif len(qBounds[i]) > 0: nPossible[i] = len(qBounds[i]) cols.remove(i) nPts = len(data) for i in xrange(nPts): for col in cols[:]: d = data[i][order[col]] if type(d) in numericTypes: if int(d) == d: nPossible[col] = max(int(d), nPossible[col]) else: nPossible[col] = -1 cols.remove(col) else: print('bye bye col %d: %s' % (col, repr(d))) nPossible[col] = -1 cols.remove(col) return list(map(lambda x: int(x) + 1, nPossible))
def CalcNPossibleUsingMap(data,order,qBounds,nQBounds=None): """ calculates the number of possible values for each variable in a data set **Arguments** - data: a list of examples - order: the ordering map between the variables in _data_ and _qBounds_ - qBounds: the quantization bounds for the variables **Returns** a list with the number of possible values each variable takes on in the data set **Notes** - variables present in _qBounds_ will have their _nPossible_ number read from _qbounds - _nPossible_ for other numeric variables will be calculated """ numericTypes = [int, float] if six.PY2: numericTypes.append(long) print('order:',order, len(order)) print('qB:',qBounds) #print('nQB:',nQBounds, len(nQBounds)) assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\ 'order/qBounds mismatch' nVars = len(order) nPossible = [-1]*nVars cols = range(nVars) for i in xrange(nVars): if nQBounds and nQBounds[i] != 0: nPossible[i] = -1 cols.remove(i) elif len(qBounds[i])>0: nPossible[i] = len(qBounds[i]) cols.remove(i) nPts = len(data) for i in xrange(nPts): for col in cols[:]: d = data[i][order[col]] if type(d) in numericTypes: if int(d) == d: nPossible[col] = max(int(d),nPossible[col]) else: nPossible[col] = -1 cols.remove(col) else: print('bye bye col %d: %s'%(col,repr(d))) nPossible[col] = -1 cols.remove(col) return list(map(lambda x:int(x)+1,nPossible))
def testOneSplit3(self): """ optimal division not possibe """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.2, 1), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 3 res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) target = (1.3, 0.88129) assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\ 'result comparison failed: %s != %s'%(res,target)
def testOneSplit2_noise(self): # """ some noise """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 1), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 2 res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) target = (1.8, 0.60999) self.assertEqual( list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], 'result comparison failed: %s != %s' % (res, target))
def testOneSplit4(self): """ lots of duplicates """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.2, 1), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1), (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 2 res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) target = (1.8, 0.68939) assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\ 'result comparison failed: %s != %s'%(res,target)
def testOneSplit5(self): """ same as testOneSplit1 data, but out of order """ d = [(1., 0), (1.1, 0), (2.2, 1), (1.2, 0), (1.6, 0), (1.4, 0), (2., 1), (2.1, 1), (1.4, 0), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 2 res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) target = (1.8, 0.97095) assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\ 'result comparison failed: %s != %s'%(res,target)
def testMultSplit2(self): """ same test as testMultSplit1, but out of order """ d = [(1., 0), (2.1, 1), (1.1, 0), (1.2, 0), (1.4, 2), (1.6, 2), (2., 2), (1.4, 2), (2.1, 1), (2.2, 1), (2.1, 1), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 3 res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) target = ([1.3, 2.05], 1.55458) assert Quantize.feq(res[1],target[1],1e-4),\ 'InfoGain comparison failed: %s != %s'%(res[1],target[1]) assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\ 'split bound comparison failed: %s != %s'%(res[0],target[0])
def testMultSplit5(self): """ dual valued, with an island, a bit noisy """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)] varValues, resCodes = zip(*d) nPossibleRes = 2 res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) target = ([1.3, 2.05], .34707) assert Quantize.feq(res[1],target[1],1e-4),\ 'InfoGain comparison failed: %s != %s'%(res[1],target[1]) assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\ 'split bound comparison failed: %s != %s'%(res[0],target[0])
def testMultSplit3(self): """ 4 possible results """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1), (3.0, 3), (3.1, 3), (3.2, 3), (3.3, 3)] varValues, resCodes = zip(*d) nPossibleRes = 4 res = Quantize.FindVarMultQuantBounds(varValues, 3, resCodes, nPossibleRes) target = ([1.30, 2.05, 2.65], 1.97722) assert Quantize.feq(res[1],target[1],1e-4),\ 'InfoGain comparison failed: %s != %s'%(res[1],target[1]) assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\ 'split bound comparison failed: %s != %s'%(res[0],target[0])
def testMultSplit4_dualValued_island(self): # """ dual valued, with an island """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 1), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 0), (2.3, 0)] varValues, resCodes = zip(*d) nPossibleRes = 2 res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) target = ([1.3, 2.05], .91830) self.assertTrue( Quantize.feq(res[1], target[1], 1e-4), 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) self.assertEqual( min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, 'split bound comparison failed: %s != %s' % (res[0], target[0]))
def testMultSplit1_simple_dual(self): # """ simple dual split """ d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)] varValues, resCodes = zip(*d) nPossibleRes = 3 res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) target = ([1.3, 2.05], 1.55458) self.assertEqual( min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, 'split bound comparison failed: %s != %s' % (res[0], target[0])) self.assertTrue( Quantize.feq(res[1], target[1], 1e-4), 'InfoGain comparison failed: %s != %s' % (res[1], target[1]))
def WriteData(outFile, varNames, qBounds, examples): """ writes out a .qdat file **Arguments** - outFile: a file object - varNames: a list of variable names - qBounds: the list of quantization bounds (should be the same length as _varNames_) - examples: the data to be written """ outFile.write('# Quantized data from DataUtils\n') outFile.write('# ----------\n') outFile.write('# Variable Table\n') for i in xrange(len(varNames)): outFile.write('# %s %s\n' % (varNames[i], str(qBounds[i]))) outFile.write('# ----------\n') for example in examples: outFile.write(' '.join(map(str, example)) + '\n')
def WriteData(outFile,varNames,qBounds,examples): """ writes out a .qdat file **Arguments** - outFile: a file object - varNames: a list of variable names - qBounds: the list of quantization bounds (should be the same length as _varNames_) - examples: the data to be written """ outFile.write('# Quantized data from DataUtils\n') outFile.write('# ----------\n') outFile.write('# Variable Table\n') for i in xrange(len(varNames)): outFile.write('# %s %s\n'%(varNames[i],str(qBounds[i]))) outFile.write('# ----------\n') for example in examples: outFile.write(' '.join(map(str,example))+'\n')
#hascQuantize=0 if hascQuantize: _RecurseOnBounds = cQuantize._RecurseOnBounds _FindStartPoints = cQuantize._FindStartPoints else: _RecurseOnBounds = _NewPyRecurseOnBounds _FindStartPoints = _NewPyFindStartPoints if __name__ == '__main__': import sys if 1: d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)] varValues = list(map(lambda x: x[0], d)) resCodes = list(map(lambda x: x[1], d)) nPossibleRes = 2 res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) print('RES:', res) target = ([1.3, 2.05], .34707) else: d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)] varValues = list(map(lambda x: x[0], d)) resCodes = list(map(lambda x: x[1], d)) nPossibleRes = 2 res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) print(res) #sys.exit(1) d = [(1.4, 1), (1.4, 0)]
return quantBounds, maxGain # hascQuantize=0 if hascQuantize: _RecurseOnBounds = cQuantize._RecurseOnBounds _FindStartPoints = cQuantize._FindStartPoints else: _RecurseOnBounds = _NewPyRecurseOnBounds _FindStartPoints = _NewPyFindStartPoints if __name__ == '__main__': if 1: d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)] varValues = list(map(lambda x: x[0], d)) resCodes = list(map(lambda x: x[1], d)) nPossibleRes = 2 res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) print('RES:', res) target = ([1.3, 2.05], .34707) else: d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)] varValues = list(map(lambda x: x[0], d)) resCodes = list(map(lambda x: x[1], d)) nPossibleRes = 2 res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) print(res) # sys.exit(1) d = [(1.4, 1), (1.4, 0)]