def getBigramFrequency(self, test_method, force_update = False): name = self.getCorpusName() if True: finder = BigramCollocationFinder.from_words(self.words) finder.apply_freq_filter(math.ceil(math.log(self.token_count - 1) /3) - 1) #@UndefinedVariable cfword = fwords.fwords() scored = finder.score_ngrams(test_method) for score in scored: if(cfword.isFunctionWord(score[0][0]) and cfword.isFunctionWord(score[0][1])): print score print "---------------------------------------"
def getRelativeFunctionWordFrequency(self, force_update = False): ''' uses the word of fwords and returns the relative frequency of their appearance to the number of tokens in the brown corpus, excluding numbers and punctuation (= 1016752) ''' name = self.getCorpusName() force_update = True if name == "temp" else force_update; #generating and storing new data or using cached values if force_update or not self.isCached(name): cfword = fwords.fwords() cfword.processWordArray(self.words) rel_frequencies = cfword.relativeFrequency(self.token_count) abs_frequencies = cfword.getCount() self.store(abs_frequencies, rel_frequencies, name) return rel_frequencies else: rel_frequencies = self.loadRelativeFrequencies(name) abs_frequencies = self.loadAbsoluteFrequencies(name)