def avgFscore(t,p):
	l = len(t)
	sum = 0
	for i in xrange(l):
		sum += fscore(t[i],p[i])
	return sum/l


labels = data.getLabelsArray()[:1000]
rawData = data.getDataArray()[:1000]
stPrimerov = len(labels)



bad = data.getBadAttributes(rawData,10)
rawData = data.filterArr(rawData,bad)

k = 10;
print "starting %d fold cross validation" % k
print "number of cases: %d" % len(rawData)
print "number of attributes: %d" % len(rawData[0])

#tolerance = [a/100.0 for a in range(40,50) if a%2 == 0]
#meja = [10,12,14,16,18,20]
#allTests = {}
#for tol in tolerance:
#	for mej in meja:
#		aaa = 0
#		allPred = []
#		for i in xrange(k):
Beispiel #2
0
import data
import math

d = data.getDataArray()
t = data.getTestArray()

a10 = data.getBadAttributes(d,10)
d10 = data.filterArr(d,a10) 
t10 = data.filterArr(t,a10)

binD = [[int(x>0) for x in i] for i in d10]
binT = [[int(x>0) for x in i] for i in t10]

logD = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in d10]
logT = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in t10]

newD = []
newT = []

for i in range(len(t10)):
	newD.append(list(d10[i])+list(binD[i])+list(logD[i]))
	newT.append(list(t10[i])+list(binT[i])+list(logT[i]))


f = file("plusBinLogTraingingData.csv","w")
f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newD ]))
f.flush()
f.close()
f = file("plusBinLogTestData.csv","w")
f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newT ]))
f.flush()