def text_features(self, comment): num_chars = len(comment.get("body")) num_links = count_links(comment.get("body")) simple_tokens = comment.get("body").split(' ') num_words = 0 avg_word_length = 0 for token in simple_tokens: num_words += 1 avg_word_length += len(token) avg_word_length = float(avg_word_length) / float(num_words) sentiment = self.sentiment_analyzer.analyze( self.bow_analyzer(comment.get("body"))) score = comment.get("score") return [num_chars, num_links, num_words, num_words, avg_word_length, sentiment]
def text_features(self, comment): num_chars = len(comment.get("body")) num_links = count_links(comment.get("body")) simple_tokens = comment.get("body").split(' ') num_words = 0 avg_word_length = 0 for token in simple_tokens: num_words += 1 avg_word_length += len(token) avg_word_length = float(avg_word_length) / float(num_words) sentiment = self.sentiment_analyzer.analyze( self.bow_analyzer(comment.get("body"))) score = comment.get("score") return [ num_chars, num_links, num_words, num_words, avg_word_length, sentiment ]
def test_count_links(): assert count_links(BODY1) == 0 assert count_links(BODY2) == 2