Beispiel #1
0
def main(args):
    point2centroid = defaultdict(list)
    point2centroidfinal = defaultdict(list)
    point2centroidreject = defaultdict(list)
    point2centroid = args
    for centroid in point2centroid:
        # print 'centroid',centroid
        centroidX, centroidY = centroid.split(',')
        distlist = finallist = pdevlist = rejectlist = []
        newX = newY = sumX = sumY = count = ctrrej = 0
        for point in point2centroid[centroid]:
            #print point
            pointX, pointY = point.split(',')
            #datapoint += pointX + ' ' + pointY +'\n'
            dist = (((int(centroidX) - int(pointX))**2) +
                    ((int(centroidY) - int(pointY))**2))**.5
            distlist.append(dist)
        pmed = stats.medianscore(distlist)
        #print 'pmed',pmed
        for p in distlist:
            pdev = abs(p - pmed)
            pdevlist.append(pdev)
            med_pdev = stats.medianscore(pdevlist)
            #print "med_pdev", med_pdev
        for p in distlist:
            if med_pdev > 0:
                test_stat = abs(p - pmed) / med_pdev
                #print test_stat
                if test_stat < 2:
                    finallist.append(p)
                else:
                    ctrrej += 1
                    rejectlist.append(p)
        for point in point2centroid[centroid]:
            # print point
            pointX, pointY = point.split(',')
            dist = (((int(centroidX) - int(pointX))**2) +
                    ((int(centroidY) - int(pointY))**2))**.5
            if dist in rejectlist and dist > 100:
                point2centroidreject[centroid].append(point)
            else:
                point2centroidfinal[centroid].append(point)
                sumX += int(pointX)
                sumY += int(pointY)
                count += 1
                newX = sumX / count
                newY = sumY / count
                newCentroid = newX + ',' + newY
                # print newCentroid
                point2centroidfinal[newCentroid] = point2centroidfinal[
                    centroid]
                del point2centroidfinal[centroid]
        clustering.main(point2centroidfinal)
Beispiel #2
0
    def test_medianscore(self):
        "Testing medianscore"

        # data of even lenghts
        data1 = [self.L, self.LF, self.A, self.AF]
        for d in data1:
            self.EQ(stats.medianscore(d), 10.5)

        # data with odd lenghts
        L2 = self.L + [20]
        A2 = num_array(L2)
        data2 = [L2, A2]
        for d in data2:
            self.EQ(stats.medianscore(d), 11)
Beispiel #3
0
    def test_medianscore(self):
        "Testing medianscore"
        
        # data of even lenghts
        data1 = [ self.L, self.LF, self.A, self.AF  ]
        for d in data1 :
            self.EQ( stats.medianscore( d ), 10.5 )

        # data with odd lenghts
        L2 = self.L + [ 20 ]
        A2 = num_array( L2 )
        data2 = [ L2, A2  ]
        for d in data2:
            self.EQ( stats.medianscore( d ), 11 )
Beispiel #4
0
 def _format_ratings(self, output):
     ratings = []
     for result in self.data:
         try:
             ratings.append( float(result[0].toPython()) )
         except ValueError:
             pass
     output['results'] = {}
     if ratings:
         output['results']['median'] = stats.medianscore(ratings)
         output['results']['mode'] = stats.mode(ratings)
         output['results']['mean'] = stats.mean(ratings)
         output['results']['histogram'] = stats.histogram(ratings,6)
         output['results']['cumfreq'] = stats.cumfreq(ratings,6)
     output['results']['count'] = len(ratings)
     return output
Beispiel #5
0
lf = list(range(1, 21))
lf[2] = 3.0
a = N.array(l)
af = N.array(lf)
ll = [l] * 5
aa = N.array(ll)

print('\nCENTRAL TENDENCY')
print('geometricmean:', stats.geometricmean(l), stats.geometricmean(lf),
      stats.geometricmean(a), stats.geometricmean(af))
print('harmonicmean:', stats.harmonicmean(l), stats.harmonicmean(lf),
      stats.harmonicmean(a), stats.harmonicmean(af))
print('mean:', stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af))
print('median:', stats.median(l), stats.median(lf), stats.median(a),
      stats.median(af))
print('medianscore:', stats.medianscore(l), stats.medianscore(lf),
      stats.medianscore(a), stats.medianscore(af))
print('mode:', stats.mode(l), stats.mode(a))

print('\nMOMENTS')
print('moment:', stats.moment(l), stats.moment(lf), stats.moment(a),
      stats.moment(af))
print('variation:', stats.variation(l), stats.variation(a),
      stats.variation(lf), stats.variation(af))
print('skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af))
print('kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis(a),
      stats.kurtosis(af))
print('tmean:', stats.tmean(a, (5, 17)), stats.tmean(af, (5, 17)))
print('tvar:', stats.tvar(a, (5, 17)), stats.tvar(af, (5, 17)))
print('tstdev:', stats.tstdev(a, (5, 17)), stats.tstdev(af, (5, 17)))
print('tsem:', stats.tsem(a, (5, 17)), stats.tsem(af, (5, 17)))
N=numpy

l = range(1,21)
lf = range(1,21)
lf[2] = 3.0
a = N.array(l)
af = N.array(lf)
ll = [l]*5
aa = N.array(ll)

print '\nCENTRAL TENDENCY'
print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)
print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)
print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)
print 'mode:',stats.mode(l),stats.mode(a)

print '\nMOMENTS'
print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)
print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)
print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)
print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)
print 'tmean:',stats.tmean(a,(5,17)),stats.tmean(af,(5,17))
print 'tvar:',stats.tvar(a,(5,17)),stats.tvar(af,(5,17))
print 'tstdev:',stats.tstdev(a,(5,17)),stats.tstdev(af,(5,17))
print 'tsem:',stats.tsem(a,(5,17)),stats.tsem(af,(5,17))
print 'describe:'
print stats.describe(l)
print stats.describe(lf)
print stats.describe(a)
def main(args):

  point2centroid = defaultdict(list)
  point2centroidfinal = defaultdict(list)
  point2centroidreject = defaultdict(list)
  point2centroid = args
  #point2centroid = [('278,728',['351,467','395,285','611,160','612,409','561,239']), ('778,258',['747,355','699,481','730,207','739,182','870,104','782,161'])]

  for centroid in point2centroid:
   # print 'centroid',centroid


    centroidX, centroidY = centroid.split(',')

    distlist=finallist=pdevlist=rejectlist=[]

    newX =newY=sumX=sumY=count=ctrrej=0


    for point in point2centroid[centroid]:
        #print point
        pointX, pointY = point.split(',')
        #datapoint += pointX + '  ' + pointY +'\n'
        dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5
        distlist.append(dist)


    pmed = stats.medianscore(distlist)
    #print 'pmed',pmed
    for p in distlist:
      pdev = abs(p-pmed)
      pdevlist.append(pdev)

    med_pdev = stats.medianscore(pdevlist)
    #print "med_pdev", med_pdev
    for p in distlist:
        if med_pdev >0:
            test_stat = abs(p-pmed)/med_pdev
            #print test_stat
            if test_stat<2:
               finallist.append(p)
            else:
                ctrrej +=1
                rejectlist.append(p)
    #print 'distlist', distlist
    #print 'finallist', finallist
    #print 'rejectlist', rejectlist
    #print 'ctrrej', ctrrej

    for point in point2centroid[centroid]:

        #print point
        pointX, pointY = point.split(',')

        dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5
        if dist in rejectlist and dist >100:
            point2centroidreject[centroid].append(point)
        else:
            point2centroidfinal[centroid].append(point)
            sumX += int(pointX)
            sumY +=int(pointY)
            count +=1

    newX=sumX/count
    newY=sumY/count
    newCentroid = `newX`+','+`newY`
    #print newCentroid
    point2centroidfinal[newCentroid]=point2centroidfinal[centroid]
    del point2centroidfinal[centroid]
  #print 'final',point2centroidfinal
  #print 'reject', point2centroidreject
  clustering.main(point2centroidfinal)
Beispiel #8
0
lf = range(1, 21)
lf[2] = 3.0
a = N.array(l)
af = N.array(lf)
ll = [l] * 5
aa = N.array(ll)

print '\nCENTRAL TENDENCY'
print 'geometricmean:', stats.geometricmean(l), stats.geometricmean(
    lf), stats.geometricmean(a), stats.geometricmean(af)
print 'harmonicmean:', stats.harmonicmean(l), stats.harmonicmean(
    lf), stats.harmonicmean(a), stats.harmonicmean(af)
print 'mean:', stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)
print 'median:', stats.median(l), stats.median(lf), stats.median(
    a), stats.median(af)
print 'medianscore:', stats.medianscore(l), stats.medianscore(
    lf), stats.medianscore(a), stats.medianscore(af)
print 'mode:', stats.mode(l), stats.mode(a)

print '\nMOMENTS'
print 'moment:', stats.moment(l), stats.moment(lf), stats.moment(
    a), stats.moment(af)
print 'variation:', stats.variation(l), stats.variation(a), stats.variation(
    lf), stats.variation(af)
print 'skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af)
print 'kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis(
    a), stats.kurtosis(af)
print 'tmean:', stats.tmean(a, (5, 17)), stats.tmean(af, (5, 17))
print 'tvar:', stats.tvar(a, (5, 17)), stats.tvar(af, (5, 17))
print 'tstdev:', stats.tstdev(a, (5, 17)), stats.tstdev(af, (5, 17))
print 'tsem:', stats.tsem(a, (5, 17)), stats.tsem(af, (5, 17))
 def evaluate(self, *args, **params):
     return _stats.medianscore(*args, **params)