Esempio n. 1
0
 def includepath(self,feat):
     if len(SimEngine.paths_to_include)>0:
         return getpathtype(feat) in SimEngine.paths_to_include
     elif len(SimEngine.blacklist)>0:
         return getpathtype(feat) not in SimEngine.blacklist
     else:
         return True
Esempio n. 2
0
    def computepathtotals(self):

        self.pathtotals={}
        for feature in self.features.keys():
            pathtype=getpathtype(feature)
            sofar=self.pathtotals.get(pathtype,0.0)
            self.pathtotals[pathtype]=sofar+float(self.features[feature])
Esempio n. 3
0
    def showsuffix(self, path, minorder=1, maxorder=1):

        feats = {}
        for feat in self.features.keys():
            thispath = getpathtype(feat)
            order = getorder(feat)
            if thispath.endswith(path) and order >= minorder and order <= maxorder:
                feats[feat] = self.features[feat]

        values = sorted(feats.items(), key=itemgetter(1), reverse=True)
        print values
Esempio n. 4
0
    def compute_typetotals(self,type,cds=False):
        #compute totals for different paths over all entries (using column totals given in feattots)

        if not self.coltots_loaded[type]:
            self.load_coltotals(type,cds)
        print "Computing path totals C<*,t,*>"
        self.typetots={}
        for feature in self.feattots.keys():
            pathtype=getpathtype(feature)
            sofar=self.typetots.get(pathtype,0.0)
            self.typetots[pathtype]=sofar+float(self.feattots[feature])
Esempio n. 5
0
    def reweight(self,weighting,feattots,typetots,grandtot=0,ppmithreshold=0,saliency=0):
        self.featureweights={}
        self.lgth=-1
        self.wdth=-1
        for feature in self.features.keys():
            freq=float(self.features[feature])  # C<w1,p,w2>
            try:
                total=float(self.pathtotals[getpathtype(feature)]) # C<w1,p,*>
            except:
                total=0.0001
                print "Warning: no path total for %s: %s"%(feature,getpathtype(feature))
            feattot=float(feattots[feature]) #C<*,p,w2>
            typetot=float(typetots[getpathtype(feature)]) #C<*,p,*>
            entrytotal=float(self.total) # C<w1,*,*>

            if "ttest" in weighting:
                expected = (total*feattot)/(typetot*typetot)  #incorrect!  this should be the type total for the entry not the total
                obs=freq/typetot
                score= (obs-expected)/math.pow(expected,0.5)
                if score>ppmithreshold:
                    self.featureweights[feature]=score
            else:

                try:
                    if "gof_ppmi" in weighting:

                        pmi=math.log10((freq*grandtot)/(feattot*entrytotal))
                    else:
                        pmi=math.log10((freq*typetot)/(feattot*total))
                except:
                    pmi=0
                shifted_pmi=pmi-ppmithreshold
                if shifted_pmi>0:
                    if "pnppmi" in weighting:

                        shifted_pmi=shifted_pmi * total/entrytotal

                    if "plmi" in weighting:
                        shifted_pmi=shifted_pmi * freq/typetot
                    self.featureweights[feature]=shifted_pmi
        self.reducesaliency(saliency)
Esempio n. 6
0
 def reducesaliency(self,saliency,saliencyperpath=False):
     if saliency==0:
         return
     else:
         #print "Carrying out saliency reduction / context selection to top",str(saliency)
         feats=sorted(self.featureweights.items(),key=itemgetter(1),reverse=True)
         self.featureweights={}
         donetypes={}
         all=0
         for tuple in feats:
             feature=tuple[0]
             pathtype=getpathtype(feature)
             done=donetypes.get(pathtype,0)
             if (saliencyperpath and done<saliency)or(not saliencyperpath and all<saliency):
                 self.featureweights[feature]=tuple[1]
                 donetypes[pathtype]=done+1
                 all+=1
Esempio n. 7
0
    def profile(self, minorder=0, maxorder=10):

        paths = {}
        totalweight = 0
        thisorderweight = 0
        for feat in self.features.keys():
            path = getpathtype(feat)
            order = getorder(feat)
            weight = self.features[feat]
            sofar = paths.get(path, 0)
            if order >= minorder and order <= maxorder:
                paths[path] = sofar + weight
                thisorderweight += weight
            totalweight += weight

        print "total weight of features", totalweight
        print "total weight of required order features", thisorderweight
        profile = sorted(paths.items(), key=itemgetter(1), reverse=True)

        print profile