def testComputGenderBucket(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() genderRating = statistics.computeGenderBucket(data) for i in genderRating: print i pass
def testComputeNDBucketEnglish4(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); ndBucket = statistics.computeNDBuckets(mat[:,18]) print "-------------------ENGLISH 4--------------------------" for idx,val in enumerate(ndBucket): print "bucket#" + str(idx) + " total: " + str(len(val)) + " min: " + str(min(val)) + " max: " + str(max(val)) print "-----------------------------------------------------" pass
def testComputeNDBucket(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); ndBucket = statistics.computeNDBuckets(mat[:,7]) for idx,val in enumerate(ndBucket): print "bucket#" + str(idx) + " total: " + str(len(val)) pass
def testComputeND(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); marksSD = statistics.computeNormalDistribution(mat[:,7]) for idx,val in enumerate(mat[:,7]): print str(val) + " ** " + str(marksSD[idx]) pass
def testComputeNDBucket(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); ndBucket = statistics.computeNDBuckets(mat[:,13]) print "-------------------Marks obtained by students in college--------------------------" for idx,val in enumerate(ndBucket): print "bucket#" + str(idx) + " total: " + str(len(val)) + " min: " + str(min(val)) + " max: " + str(max(val)) pass
def testComputNameLength(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); averages = statistics.computeNameLengthBucket(mat); print "----------Average length of names-------------" for idx, val in enumerate(averages): print str(idx) + " " + str(val) pass
def testCompareWithPerformance(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); ndBucket = statistics.computeNDBucketsWrtPrediction(mat[:,8], mat[:,30]) for idx,val in enumerate(ndBucket): total = 0 for yval in val: total = total + len(yval) # waste of n cycles print "$performer:" + str(idx) for fdx, yval in enumerate(val): print "probability#" + str(len(yval) / float(total)) + "-- P(" + str(idx) + "|" + str(fdx) + ")" print "grade#" + str(fdx) + " total: " + str(len(yval)) + " min: " + str(min(yval)) + " max: " + str(max(yval)) pass
def computeFullDataGrades(self): fileutil = FileUtil() data = fileutil.readFile("../resource/dataset.xls") statistics = Statistics() mat = numpy.array(data[2:len(data)]); rmat = numpy.zeros((len(mat),19)) tuple = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] gradingCritera = numpy.zeros((19,4)) gradingTuple = [0,1,2,3] #we can fit 5 values in this range #for i in range(0,18): # gradingCritera.insert(i, gradingTuple) #for j in range(0,18): #gradingCritera[i].insert(j) #fetch all the values from data for i in range(0,18): bucket = statistics.computeNDBuckets(mat[:,self.atMap(i)]) for j in range(0,4): gradingCritera[i][j] = max(bucket[j]) for i in range(rmat.shape[0]): for j in range(rmat.shape[1]): try: x = decimal.Decimal(mat[i][self.atMap(j)]) except InvalidOperation, ex: x = gradingCritera[j][1] # passing it average value #print "i:" + str(i) + " j:" + str(j) if (x>=gradingCritera[j][3]): rmat[i][j] = 4 elif (x>=gradingCritera[j][2]): rmat[i][j] = 3 elif (x>=gradingCritera[j][1]): rmat[i][j] = 2 elif (x>=gradingCritera[j][0]): rmat[i][j] = 1 else: rmat[i][j] = 0
def testReadFile(self): # this was dummy # self.assertEqual(10,math.log10(100),"OK") fileutil = FileUtil() fileutil.readFile("../resource/dataset.xls") pass