# trainD = rawData[:f]+rawData[t:] # trainL = labels[:f]+labels[t:] # testD = rawData[f:t] # testL = labels[f:t] # # predictions = getKnnResults(trainD,trainL,testD,tol,mej) # allPred += predictions # # avgf = avgFscore(testL,predictions) # aaa += avgf # print "%2d fscore : %.6f" % (i, avgf) # # print "povpreceno (%f,%d) : %.6f" % (tol,mej,aaa/k) # allTests[mej+tol] = allPred labels = data.getLabelsArray(True) rawData = data.getDataArray(True) stPrimerov = len(labels) testData = data.getTestArray(True) predictions = getPredictionsRows(rawData,labels,testData) cPickle.dump(predictions,open("minidata/knn-sotedRes-straight-dist-%d.pickled" % time(),"w")) f = file("result%d.csv" % time(),"w") f.write("\n".join([",".join([str(x) for x in i]) for i in predictions ])) f.flush() f.close()
import data import math d = data.getDataArray() t = data.getTestArray() a10 = data.getBadAttributes(d,10) d10 = data.filterArr(d,a10) t10 = data.filterArr(t,a10) binD = [[int(x>0) for x in i] for i in d10] binT = [[int(x>0) for x in i] for i in t10] logD = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in d10] logT = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in t10] newD = [] newT = [] for i in range(len(t10)): newD.append(list(d10[i])+list(binD[i])+list(logD[i])) newT.append(list(t10[i])+list(binT[i])+list(logT[i])) f = file("plusBinLogTraingingData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newD ])) f.flush() f.close() f = file("plusBinLogTestData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newT ])) f.flush()