Beispiel #1
0
 def getBigramFrequency(self, test_method, force_update = False):
     name = self.getCorpusName()
     if True:
         finder = BigramCollocationFinder.from_words(self.words)
         finder.apply_freq_filter(math.ceil(math.log(self.token_count - 1) /3) - 1) #@UndefinedVariable
         cfword = fwords.fwords()
         scored = finder.score_ngrams(test_method)
         for score in scored:
             if(cfword.isFunctionWord(score[0][0]) and cfword.isFunctionWord(score[0][1])):
                 print score
     print "---------------------------------------"
Beispiel #2
0
 def getRelativeFunctionWordFrequency(self, force_update = False):
     '''
     uses the word of fwords and returns the relative frequency of their appearance to
     the number of tokens in the brown corpus, excluding numbers and punctuation (= 1016752)
     '''
     name = self.getCorpusName()
     force_update = True if name == "temp" else force_update;
     #generating and storing new data or using cached values    
     if force_update or not self.isCached(name):
         cfword = fwords.fwords()
         cfword.processWordArray(self.words)
         rel_frequencies = cfword.relativeFrequency(self.token_count) 
         abs_frequencies = cfword.getCount()
         self.store(abs_frequencies, rel_frequencies, name)
         return rel_frequencies
     else:
         rel_frequencies = self.loadRelativeFrequencies(name)
         abs_frequencies = self.loadAbsoluteFrequencies(name)