def fscore(t,p):
	per = precision(t,p)
	rec = recall(t,p)
	return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0

def avgFscore(t,p):
	l = len(t)
	sum = 0
	for i in xrange(l):
		sum += fscore(t[i],p[i])
	return sum/l


labels = data.getLabelsArray()[:1000]
rawData = data.getDataArray()[:1000]
stPrimerov = len(labels)



bad = data.getBadAttributes(rawData,10)
rawData = data.filterArr(rawData,bad)

k = 10;
print "starting %d fold cross validation" % k
print "number of cases: %d" % len(rawData)
print "number of attributes: %d" % len(rawData[0])

#tolerance = [a/100.0 for a in range(40,50) if a%2 == 0]
#meja = [10,12,14,16,18,20]
#allTests = {}
Exemple #2
0
def fscore(t,p):
	per = precision(t,p)
	rec = recall(t,p)
	return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0

def avgFscore(t,p):
	l = len(t)
	sum = 0
	for i in xrange(l):
		sum += fscore(t[i],p[i])
	return sum/l


labels = data.getLabelsArray()
rawData = data.getDataArray()
stPrimerov = len(labels)



#bad = data.getBadAttributes(rawData,10)
#rawData = data.filterArr(rawData,bad)

k = 10;
print "starting %d fold cross validation" % k
print "number of cases: %d" % len(rawData)
print "number of attributes: %d" % len(rawData[0])

#tolerance = [a/100.0 for a in range(40,50) if a%2 == 0]
#meja = [10,12,14,16,18,20]
#allTests = {}
Exemple #3
0
import matplotlib.pyplot as plot
import data
from sets import Set
from itertools import chain
from collections import Counter
from random import shuffle


rawL = data.getLabelsArray()
rawD = data.getDataArray()
remLD, remLL = data.removeLeastCommonData(rawD, rawL,5)
remMD, remML = data.removeMostCommonData(rawD, rawL,20)
addD , addL  = data.addFakeData(rawD, rawL,50)

#expD, expL = data.removeLeastCommonData(rawD, rawL,5)
#expD, expL = data.removeMostCommonData(expD, expL,20)
#expD, expL = data.addFakeData(expD, expL,80)

expD, expL = data.addFakeData(rawD, rawL,80)
expD, expL = data.removeLeastCommonData(expD, expL,5)
expD, expL = data.removeMostCommonData(expD, expL,20)

#plot.hist(list(chain(*addL)),bins=83)
#plot.hist(list(chain(*rawL)),bins=83)
#plot.hist(list(chain(*remLL)),bins=83)
#plot.hist(list(chain(*remML)),bins=83)
#plot.hist(list(chain(*expL)),bins=83)
#plot.xlabel("stevilo oznak")
#plot.ylabel("stevilo primerov")
#plot.show()
#plot.close()
Exemple #4
0
import data
import math

d = data.getDataArray()
t = data.getTestArray()

a10 = data.getBadAttributes(d,10)
d10 = data.filterArr(d,a10) 
t10 = data.filterArr(t,a10)

binD = [[int(x>0) for x in i] for i in d10]
binT = [[int(x>0) for x in i] for i in t10]

logD = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in d10]
logT = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in t10]

newD = []
newT = []

for i in range(len(t10)):
	newD.append(list(d10[i])+list(binD[i])+list(logD[i]))
	newT.append(list(t10[i])+list(binT[i])+list(logT[i]))


f = file("plusBinLogTraingingData.csv","w")
f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newD ]))
f.flush()
f.close()
f = file("plusBinLogTestData.csv","w")
f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newT ]))
f.flush()
def fscore(t,p):
	per = precision(t,p)
	rec = recall(t,p)
	return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0

def avgFscore(t,p):
	l = len(t)
	sum = 0
	for i in xrange(l):
		sum += fscore(t[i],p[i])
	return sum/l


labels = data.getLabelsArray()
rawData = data.getDataArray()
stPrimerov = len(labels)



#bad = data.getBadAttributes(rawData,10)
#rawData = data.filterArr(rawData,bad)

k = 10;
print "starting %d fold cross validation" % k
print "number of cases: %d" % len(rawData)
print "number of attributes: %d" % len(rawData[0])

aaa = 0
allPred = []
for i in xrange(k):