def testComputGenderBucket(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     genderRating = statistics.computeGenderBucket(data)
     
     for i in genderRating:
         print i
     pass
 def testComputeNDBucketEnglish4(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     ndBucket = statistics.computeNDBuckets(mat[:,18])
     print "-------------------ENGLISH 4--------------------------"
     for idx,val in enumerate(ndBucket):
         print "bucket#" + str(idx) + " total: " + str(len(val)) + " min: " + str(min(val)) + " max: " + str(max(val))
     print "-----------------------------------------------------"
     pass
 def testComputeNDBucket(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     ndBucket = statistics.computeNDBuckets(mat[:,7])
     
     
     
     for idx,val in enumerate(ndBucket):
         print "bucket#" + str(idx) + " total: " + str(len(val))
     pass
 def testComputeND(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     marksSD = statistics.computeNormalDistribution(mat[:,7])
     
     
     
     for idx,val in enumerate(mat[:,7]):
         print str(val) + " ** " + str(marksSD[idx])
     pass
 def testComputeNDBucket(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     ndBucket = statistics.computeNDBuckets(mat[:,13])
     
     
     print "-------------------Marks obtained by students in college--------------------------"
     for idx,val in enumerate(ndBucket):
         print "bucket#" + str(idx) + " total: " + str(len(val)) + " min: " + str(min(val)) + " max: " + str(max(val))
     pass
    def testComputNameLength(self):
        fileutil = FileUtil()
        data = fileutil.readFile("../resource/dataset.xls")
        statistics = Statistics()
        mat = numpy.array(data[2:len(data)]);
        
        averages = statistics.computeNameLengthBucket(mat);
        print "----------Average length of names-------------"
        for idx, val in enumerate(averages):
            print str(idx) + " " + str(val)


        pass
 def testCompareWithPerformance(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     ndBucket = statistics.computeNDBucketsWrtPrediction(mat[:,8], mat[:,30])
     
     for idx,val in enumerate(ndBucket):
         total = 0
         for yval in val:
             total = total + len(yval) # waste of n cycles
             
         print "$performer:" + str(idx)
         for fdx, yval in enumerate(val):
             print "probability#" + str(len(yval) / float(total)) + "--  P(" + str(idx) + "|" + str(fdx) + ")"
             print "grade#" + str(fdx) + " total: " + str(len(yval)) + " min: " + str(min(yval)) + " max: " + str(max(yval))
     pass
Beispiel #8
0
 def computeFullDataGrades(self):
     fileutil = FileUtil()
     data = fileutil.readFile("../resource/dataset.xls")
     statistics = Statistics()
     mat = numpy.array(data[2:len(data)]);
     rmat = numpy.zeros((len(mat),19))
     
     
     tuple = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]
     gradingCritera = numpy.zeros((19,4))
     gradingTuple = [0,1,2,3] #we can fit 5 values in this range
     #for i in range(0,18):
     #   gradingCritera.insert(i, gradingTuple)
         #for j in range(0,18):
             #gradingCritera[i].insert(j)
   
     #fetch all the values from data
     for i in range(0,18):
         bucket = statistics.computeNDBuckets(mat[:,self.atMap(i)])
         for j in range(0,4):
             gradingCritera[i][j] = max(bucket[j]) 
     
     for i in range(rmat.shape[0]):
         for j in range(rmat.shape[1]):
             try:
                 x = decimal.Decimal(mat[i][self.atMap(j)])
             except InvalidOperation, ex:
                 x = gradingCritera[j][1] # passing it average value
             #print "i:" + str(i) + " j:" + str(j)
             if (x>=gradingCritera[j][3]):
                 rmat[i][j] = 4
             elif (x>=gradingCritera[j][2]):
                 rmat[i][j] = 3
             elif (x>=gradingCritera[j][1]):
                 rmat[i][j] = 2
             elif (x>=gradingCritera[j][0]):
                 rmat[i][j] = 1
             else:
                 rmat[i][j] = 0