Esempio n. 1
0
    def adjustsims(self,myBless,meanpoly,sdpoly):

        (_,w1)=myBless.countdict.get(self.word,(0,0))
  #      print self.word,w1
        for(sim,neigh) in self.tuplelist:
            (_,w2)=myBless.countdict.get(untag(neigh,'/'),(0,0))
            #print self.word,w1,neigh,w2
            jointwidth=widthfunction(w1,w2)
            mean=meanpoly(jointwidth)
            sd=sdpoly(jointwidth)
            p=normal(mean,sd).cdf(sim)
            self.allsims[neigh]=p
        self.tuplelist=[]
        self.topk(self.getk())
Esempio n. 2
0
 def get_topsim(self,concept,wordlist):
     #for given concept find closest neighbour in wordlist and return rank and sim
     rank=1
     topsim=0
     maxrank=1000
     toprank=maxrank
     vector=self.vectordict.get(concept,None)
     if vector!=None:
         for (sim, word) in vector.tuplelist: #sorted list of concepts neighbours
             if untag(word,'/') in wordlist: #hey presto found the nearest one
                 topsim=sim
                 toprank=rank
                 break
             else:
                 rank+=1
     else:
         print "Warning: No vector for: ",concept
     #convertrank=float(maxrank-toprank)/float(maxrank)
     return (toprank,topsim)
Esempio n. 3
0
    def readsomesims(self,entrylist):

        print"Reading sim file "+self.simcachefile
        linesread=0
        instream=open(self.simcachefile,'r')
        for line in instream:
            if untag(line.split('\t')[0],'/') in entrylist:
                self.processsimline(line.rstrip())
            linesread+=1
            if self.updated==len(entrylist):
                #all found
                break
            if (linesread%1000 == 0):
                #print "Read "+str(linesread)+" lines and updated "+str(self.updated)+" similarity vectors"
                sys.stdout.flush()
                #return
        self.topk(self.k)
        print "Read "+str(linesread)+" lines and updated "+str(self.updated)+" vectors"

        instream.close()
Esempio n. 4
0
 def correlate(self,myBless,displaylist=[0,2,3]):
     labels=['Log Width','Log Frequency','Average Similarity','Sd similarity']
     mymatrix=[[],[],[],[]]
     polys=[]
     for concept in myBless.entrydict.keys():
         concept2=(concept,'N')
         self.vectordict[concept2].analyse()
         (freq,width)=myBless.countdict.get(concept,(0,0))
         freq=math.log(float(freq))
   #      width=math.log(float(width))
         if self._do_top:
             sim=self.vectordict[concept2].topsim
             nn = untag(self.vectordict[concept2].nearestneighbour,'/')
             (f2,w2)=myBless.countdict.get(nn,(0,0))
             f2=math.log(float(f2))
   #          w2=math.log(float(w2))
  #           freq=math.pow(float(f2)*float(freq),0.5)
            # width = math.pow(float(w2)*float(width),0.5)
             width = widthfunction(width,w2)
        #     print nn, sim,f2,w2, width
             labels[2]='Top Similarity'
         else:
             sim=float(self.vectordict[concept2].avgsim)
             width=widthfunction(width,width)
         sd=float(self.vectordict[concept2].sd)
         #print concept, width, freq, sim,sd
         mymatrix[1].append(freq)
         mymatrix[2].append(sim)
         mymatrix[3].append(sd)
         mymatrix[0].append(width)
     for i in range(len(displaylist)-1):
         for j in range(i+1,len(displaylist)):
             print labels[displaylist[i]],labels[displaylist[j]]
             xs=np.array(mymatrix[displaylist[i]])
             ys=np.array(mymatrix[displaylist[j]])
             whichpoly=(j+1)%2
             whichpoly=0
             #print whichpoly
             polys.append(showpoly(xs,ys,labels[displaylist[i]],labels[displaylist[j]])[whichpoly])
     return polys
Esempio n. 5
0
 def allsims(self,entrylist):
     if self.blesscache:
         self.simcachefile=self.simcachefile+".blesscache"
     print"Reading sim file "+self.simcachefile
     linesread=0
     instream=open(self.simcachefile,'r')
     for line in instream:
         word=line.split('\t')[0]
         if self.poscheck(word,'/') and untag(word,'/') in entrylist:#check this is a word in blessDB with correct pos
             self.processsimline(line.rstrip())
         linesread+=1
         if (linesread%1000 == 0):
             print "Read "+str(linesread)+" lines and updated "+str(self.updated)+" similarity vectors"
             sys.stdout.flush()
             #return
     self.topk(self.k)
     print "Read "+str(linesread)+" lines and updated "+str(self.updated)+" vectors"
     print self.vectordict.keys()
     instream.close()
     if not self.blesscache:
         #write cache
         self.writecache()