def text_features(self, comment):
        num_chars = len(comment.get("body"))
        num_links = count_links(comment.get("body"))

        simple_tokens = comment.get("body").split(' ')
        num_words = 0
        avg_word_length = 0
        for token in simple_tokens:
            num_words += 1
            avg_word_length += len(token)
        avg_word_length = float(avg_word_length) / float(num_words)

        sentiment = self.sentiment_analyzer.analyze(
            self.bow_analyzer(comment.get("body")))

        score = comment.get("score")

        return [num_chars, num_links, num_words, num_words, 
                avg_word_length, sentiment]
    def text_features(self, comment):
        num_chars = len(comment.get("body"))
        num_links = count_links(comment.get("body"))

        simple_tokens = comment.get("body").split(' ')
        num_words = 0
        avg_word_length = 0
        for token in simple_tokens:
            num_words += 1
            avg_word_length += len(token)
        avg_word_length = float(avg_word_length) / float(num_words)

        sentiment = self.sentiment_analyzer.analyze(
            self.bow_analyzer(comment.get("body")))

        score = comment.get("score")

        return [
            num_chars, num_links, num_words, num_words, avg_word_length,
            sentiment
        ]
def test_count_links():
    assert count_links(BODY1) == 0
    assert count_links(BODY2) == 2
Example #4
0
def test_count_links():
    assert count_links(BODY1) == 0
    assert count_links(BODY2) == 2