Example #1
0
    def most_frequent(self, filtered_tokens, processed_tokens, header=True):
        if header:
            filtered_tokens = filtered_tokens[1:]
            processed_tokens = processed_tokens[1:]

        filtered_document = []
        for sent_token in filtered_tokens:
            # don't add id to the total document
            if len(self.id) > 0:
                filtered_document += sent_token[1:]
            else:
                filtered_document += sent_token

        processed_document = []
        for sent_token in processed_tokens:
            if len(self.id) > 0:
                processed_document += sent_token[1:]
            else:
                filtered_document += sent_token

        filtered_most_common = FreqDist(filtered_document).most_common()
        filtered_most_common.insert(0, ['word', 'frequency'])
        processed_most_common = FreqDist(processed_document).most_common()
        processed_most_common.insert(0, ['word', 'frequency'])

        return filtered_most_common, processed_most_common