コード例 #1
0
def pos_feature(features, sentence):

    sentence_pos = exp_replace.replace_emo(sentence)
    tokens = nltk.word_tokenize(sentence_pos)
    tokens = [(t.lower()) for t in tokens]
    pos_vector = sentiments.posvector(tokens)
    for j in range(len(pos_vector)):
        features["POS" + str(j + 1)] = pos_vector[j]
コード例 #2
0
def pos_feature(features, sentence):

    sentence_pos = exp_replace.replace_emo(sentence)
    tokens = nltk.word_tokenize(sentence_pos)
    tokens = [(t.lower()) for t in tokens]
    pos_vector = sentiments.posvector(tokens)
    for j in range(len(pos_vector)):
        features['POS' + str(j + 1)] = pos_vector[j]
コード例 #3
0
def sent_feature(features, sentence):
    sentence_sentiment = exp_replace.replace_emo(sentence) ## :) is replaced by good and :( is replaced by sad
    tokens = nltk.word_tokenize(sentence_sentiment)
    tokens = [(t.lower()) for t in tokens]

    mean_sentiment = sentiments.score_sentence(tokens)
    features['Positive sentiment'] = mean_sentiment[0]
    features['Negative sentiment'] = mean_sentiment[1]
    features['Sentiment'] = mean_sentiment[0] - mean_sentiment[1]

    # TextBlob sentiment analysis for full sentence
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in tokens]).strip())
        features['BlobSentiment'] = blob.sentiment.polarity
    except:
        features['BlobSentiment'] = 0.0
  
    # Splitting the sentence into 2 parts and then calculating sentiment analysis on the sentence
    if len(tokens) == 1:
        tokens += ['.']
    f_half = tokens[0:len(tokens) / 2]
    s_half = tokens[len(tokens) / 2:]

    mean_sentiment_f = sentiments.score_sentence(f_half)
    features['PosSentiment1/2'] = mean_sentiment_f[0]
    features['Negsentiment1/2'] = mean_sentiment_f[1]
    features['Sentiment1/2'] = mean_sentiment_f[0] - mean_sentiment_f[1]

    mean_sentiment_s = sentiments.score_sentence(s_half)
    features['PosSentiment2/2'] = mean_sentiment_s[0]
    features['NegSentiment2/2'] = mean_sentiment_s[1]
    features['Sentiment2/2'] = mean_sentiment_s[0] - mean_sentiment_s[1]

    features['SentimentContrast'] = np.abs(features['Sentiment 1/2'] - features['Sentiment 2/2'])

    # TextBlob sentiment analysis for bith the halves
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip())
        features['BlobSentiment1/2'] = blob.sentiment.polarity
     except:
        features['BlobSentiment1/2'] = 0.0
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip())
        features['BlobSentiment2/2'] = blob.sentiment.polarity
    except:
        features['BlobSentiment2/2'] = 0.0
        features['BlobSubjectivity2/2'] = 0.0

    features['BlobSentimentContrast'] = np.abs(features['BlobSentiment1/2'] - features['BlobSentiment2/2'])
コード例 #4
0
def sent_feature(features, sentence):

    sentence_sentiment = exp_replace.replace_emo(sentence)
    tokens = nltk.word_tokenize(sentence_sentiment)
    tokens = [(t.lower()) for t in tokens]

    mean_sentiment = sentiments.score_sentence(tokens)
    features['Positive sentiment'] = mean_sentiment[0]
    features['Negative sentiment'] = mean_sentiment[1]
    features['Sentiment'] = mean_sentiment[0] - mean_sentiment[1]

    #TextBlob sentiment analysis
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in tokens
        ]).strip())
        features['Blob sentiment'] = blob.sentiment.polarity
        features['Blob subjectivity'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment'] = 0.0
        features['Blob subjectivity'] = 0.0

    #Split in 2
    if len(tokens) == 1:
        tokens += ['.']
    f_half = tokens[0:len(tokens) / 2]
    s_half = tokens[len(tokens) / 2:]

    mean_sentiment_f = sentiments.score_sentence(f_half)
    features['Positive sentiment 1/2'] = mean_sentiment_f[0]
    features['Negative sentiment 1/2'] = mean_sentiment_f[1]
    features['Sentiment 1/2'] = mean_sentiment_f[0] - mean_sentiment_f[1]

    mean_sentiment_s = sentiments.score_sentence(s_half)
    features['Positive sentiment 2/2'] = mean_sentiment_s[0]
    features['Negative sentiment 2/2'] = mean_sentiment_s[1]
    features['Sentiment 2/2'] = mean_sentiment_s[0] - mean_sentiment_s[1]

    features['Sentiment contrast 2'] = np.abs(features['Sentiment 1/2'] -
                                              features['Sentiment 2/2'])

    #TextBlob sentiment analysis
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in f_half
        ]).strip())
        features['Blob sentiment 1/2'] = blob.sentiment.polarity
        features['Blob subjectivity 1/2'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment 1/2'] = 0.0
        features['Blob subjectivity 1/2'] = 0.0
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in s_half
        ]).strip())
        features['Blob sentiment 2/2'] = blob.sentiment.polarity
        features['Blob subjectivity 2/2'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment 2/2'] = 0.0
        features['Blob subjectivity 2/2'] = 0.0

    features['Blob Sentiment contrast 2'] = np.abs(
        features['Blob sentiment 1/2'] - features['Blob sentiment 2/2'])

    #Split in 3
    if len(tokens) == 2:
        tokens += ['.']
    f_half = tokens[0:len(tokens) / 3]
    s_half = tokens[len(tokens) / 3:2 * len(tokens) / 3]
    t_half = tokens[2 * len(tokens) / 3:]

    mean_sentiment_f = sentiments.score_sentence(f_half)
    features['Positive sentiment 1/3'] = mean_sentiment_f[0]
    features['Negative sentiment 1/3'] = mean_sentiment_f[1]
    features['Sentiment 1/3'] = mean_sentiment_f[0] - mean_sentiment_f[1]

    mean_sentiment_s = sentiments.score_sentence(s_half)
    features['Positive sentiment 2/3'] = mean_sentiment_s[0]
    features['Negative sentiment 2/3'] = mean_sentiment_s[1]
    features['Sentiment 2/3'] = mean_sentiment_s[0] - mean_sentiment_s[1]

    mean_sentiment_t = sentiments.score_sentence(t_half)
    features['Positive sentiment 3/3'] = mean_sentiment_t[0]
    features['Negative sentiment 3/3'] = mean_sentiment_t[1]
    features['Sentiment 3/3'] = mean_sentiment_t[0] - mean_sentiment_t[1]

    features['Sentiment contrast 3'] = np.abs(features['Sentiment 1/3'] -
                                              features['Sentiment 3/3'])

    #TextBlob sentiment analysis
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in f_half
        ]).strip())
        features['Blob sentiment 1/3'] = blob.sentiment.polarity
        features['Blob subjectivity 1/3'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment 1/3'] = 0.0
        features['Blob subjectivity 1/3'] = 0.0
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in s_half
        ]).strip())
        features['Blob sentiment 2/3'] = blob.sentiment.polarity
        features['Blob subjectivity 2/3'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment 2/3'] = 0.0
        features['Blob subjectivity 2/3'] = 0.0
    try:
        blob = TextBlob("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in t_half
        ]).strip())
        features['Blob sentiment 3/3'] = blob.sentiment.polarity
        features['Blob subjectivity 3/3'] = blob.sentiment.subjectivity
    except:
        features['Blob sentiment 3/3'] = 0.0
        features['Blob subjectivity 3/3'] = 0.0

    features['Blob Sentiment contrast 3'] = np.abs(
        features['Blob sentiment 1/3'] - features['Blob sentiment 3/3'])
コード例 #5
0
def sent_feature(features, sentence):

    sentence_sentiment = exp_replace.replace_emo(sentence)
    tokens = nltk.word_tokenize(sentence_sentiment)
    tokens = [(t.lower()) for t in tokens]

    mean_sentiment = sentiments.score_sentence(tokens)
    features["Positive sentiment"] = mean_sentiment[0]
    features["Negative sentiment"] = mean_sentiment[1]
    features["Sentiment"] = mean_sentiment[0] - mean_sentiment[1]

    # TextBlob sentiment analysis
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in tokens]).strip()
        )
        features["Blob sentiment"] = blob.sentiment.polarity
        features["Blob subjectivity"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment"] = 0.0
        features["Blob subjectivity"] = 0.0

    # Split in 2
    if len(tokens) == 1:
        tokens += ["."]
    f_half = tokens[0 : len(tokens) / 2]
    s_half = tokens[len(tokens) / 2 :]

    mean_sentiment_f = sentiments.score_sentence(f_half)
    features["Positive sentiment 1/2"] = mean_sentiment_f[0]
    features["Negative sentiment 1/2"] = mean_sentiment_f[1]
    features["Sentiment 1/2"] = mean_sentiment_f[0] - mean_sentiment_f[1]

    mean_sentiment_s = sentiments.score_sentence(s_half)
    features["Positive sentiment 2/2"] = mean_sentiment_s[0]
    features["Negative sentiment 2/2"] = mean_sentiment_s[1]
    features["Sentiment 2/2"] = mean_sentiment_s[0] - mean_sentiment_s[1]

    features["Sentiment contrast 2"] = np.abs(features["Sentiment 1/2"] - features["Sentiment 2/2"])

    # TextBlob sentiment analysis
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip()
        )
        features["Blob sentiment 1/2"] = blob.sentiment.polarity
        features["Blob subjectivity 1/2"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment 1/2"] = 0.0
        features["Blob subjectivity 1/2"] = 0.0
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip()
        )
        features["Blob sentiment 2/2"] = blob.sentiment.polarity
        features["Blob subjectivity 2/2"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment 2/2"] = 0.0
        features["Blob subjectivity 2/2"] = 0.0

    features["Blob Sentiment contrast 2"] = np.abs(features["Blob sentiment 1/2"] - features["Blob sentiment 2/2"])

    # Split in 3
    if len(tokens) == 2:
        tokens += ["."]
    f_half = tokens[0 : len(tokens) / 3]
    s_half = tokens[len(tokens) / 3 : 2 * len(tokens) / 3]
    t_half = tokens[2 * len(tokens) / 3 :]

    mean_sentiment_f = sentiments.score_sentence(f_half)
    features["Positive sentiment 1/3"] = mean_sentiment_f[0]
    features["Negative sentiment 1/3"] = mean_sentiment_f[1]
    features["Sentiment 1/3"] = mean_sentiment_f[0] - mean_sentiment_f[1]

    mean_sentiment_s = sentiments.score_sentence(s_half)
    features["Positive sentiment 2/3"] = mean_sentiment_s[0]
    features["Negative sentiment 2/3"] = mean_sentiment_s[1]
    features["Sentiment 2/3"] = mean_sentiment_s[0] - mean_sentiment_s[1]

    mean_sentiment_t = sentiments.score_sentence(t_half)
    features["Positive sentiment 3/3"] = mean_sentiment_t[0]
    features["Negative sentiment 3/3"] = mean_sentiment_t[1]
    features["Sentiment 3/3"] = mean_sentiment_t[0] - mean_sentiment_t[1]

    features["Sentiment contrast 3"] = np.abs(features["Sentiment 1/3"] - features["Sentiment 3/3"])

    # TextBlob sentiment analysis
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip()
        )
        features["Blob sentiment 1/3"] = blob.sentiment.polarity
        features["Blob subjectivity 1/3"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment 1/3"] = 0.0
        features["Blob subjectivity 1/3"] = 0.0
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip()
        )
        features["Blob sentiment 2/3"] = blob.sentiment.polarity
        features["Blob subjectivity 2/3"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment 2/3"] = 0.0
        features["Blob subjectivity 2/3"] = 0.0
    try:
        blob = TextBlob(
            "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in t_half]).strip()
        )
        features["Blob sentiment 3/3"] = blob.sentiment.polarity
        features["Blob subjectivity 3/3"] = blob.sentiment.subjectivity
    except:
        features["Blob sentiment 3/3"] = 0.0
        features["Blob subjectivity 3/3"] = 0.0

    features["Blob Sentiment contrast 3"] = np.abs(features["Blob sentiment 1/3"] - features["Blob sentiment 3/3"])
コード例 #6
0
def extractFeatureOfASentence(sen):
    """
    This method extracts features of a single sentence.
    We have following list of features being extracted.
    1. Full sentence Polarity
    2. Full sentence Subjectivity
    3. Half sentence Polarity (1/2 and 2/2)
    4. Half sentence Subjectivity (1/2 and 2/2)
    5. Difference between polarities of two halves
    6. Third sentence Polarity (1/3, 2/3 and 3/3)
    7. Third sentence Subjectivity (1/3, 2/3 and 3/3)
    8. Difference between max and min polarity of the thirds.
    9. Fourth sentence Polarity (1/4, 2/4, 3/4 and 4/4)
    10. Fourth sentence Subjectivity (1/4, 2/4, 3/4 and 4/4)
    11. Difference between max and min polarities of the fourths.

    Like this we extract 23 features of a single sentence.
    :param sen:
    :return:
    """
    features = []
    # adding capitalization feature
    counter = 0
    threshold = 4
    sentence_plain = sen.decode('UTF-8')
    for j in range(len(sentence_plain)):
        counter += int(sentence_plain[j].isupper())
    features.append(counter)
    # end of adding capitalization  feature
    # Tokenize the sentence and then convert everthing to lower case.
    tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen)))
    tokens = [(t.lower()) for t in tokens]
    # Adding pos_feature
    pos_vector = posvector(tokens)
    for j in range(len(pos_vector)):
        features.append(pos_vector[j])
    # End of adding pos_feature

    # Extract features of full sentence.
    fullBlob = TextBlob(joinTokens(tokens))
    features.append(fullBlob.sentiment.polarity)
    features.append(fullBlob.sentiment.subjectivity)
    # Extract features of halves.
    size = len(tokens) // 2
    parts = []
    i = 0
    while i <= len(tokens):
        if i == size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size
    for x in range(0, len(parts)):
        part = parts[x]
        halfBlob = TextBlob(joinTokens(part))
        features.append(halfBlob.sentiment.polarity)
        features.append(halfBlob.sentiment.subjectivity)
    features.append(np.abs(features[-2] - features[-4]))

    # Extract features of thirds.
    size = len(tokens) // 3
    parts = []
    i = 0
    while i <= len(tokens):
        if i == 2 * size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size

    ma = -2
    mi = 2
    for x in range(0, len(parts)):
        part = parts[x]
        thirdsBlob = TextBlob(joinTokens(part))
        pol = thirdsBlob.sentiment.polarity
        sub = thirdsBlob.sentiment.subjectivity
        if pol > ma:
            ma = pol
        if pol < mi:
            mi = pol
        features.append(pol)
        features.append(sub)
    features.append(np.abs(ma - mi))

    # Extract features of fourths.
    size = len(tokens) // 4
    parts = []
    i = 0
    while i <= len(tokens):
        if i == 3 * size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size
    ma = -2
    mi = 2
    for x in range(0, len(parts)):
        part = parts[x]
        fourthsBlob = TextBlob(joinTokens(part))
        pol = fourthsBlob.sentiment.polarity
        sub = fourthsBlob.sentiment.subjectivity
        if pol > ma:
            ma = pol
        if pol < mi:
            mi = pol
        features.append(pol)
        features.append(sub)
    features.append(np.abs(ma - mi))

    return features
コード例 #7
0
def extractFeatureOfASentence(sen):
    features = []

    # Tokenize the sentence and then convert everthing to lower case.
    tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen)))
    tokens = [(t.lower()) for t in tokens]

    # Extract features of full sentence.
    fullBlob = TextBlob(joinTokens(tokens))
    features.append(fullBlob.sentiment.polarity)
    features.append(fullBlob.sentiment.subjectivity)

    # Extract features of halves.
    size = len(tokens) // 2
    parts = []
    i = 0
    while i <= len(tokens):
        if i == size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size
    for x in range(0, len(parts)):
        part = parts[x]
        halfBlob = TextBlob(joinTokens(part))
        features.append(halfBlob.sentiment.polarity)
        features.append(halfBlob.sentiment.subjectivity)
    features.append(np.abs(features[-2] - features[-4]))

    # Extract features of thirds.
    size = len(tokens) // 3
    parts = []
    i = 0
    while i <= len(tokens):
        if i == 2 * size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size

    ma = -2
    mi = 2
    for x in range(0, len(parts)):
        part = parts[x]
        thirdsBlob = TextBlob(joinTokens(part))
        pol = thirdsBlob.sentiment.polarity
        sub = thirdsBlob.sentiment.subjectivity
        if pol > ma:
            ma = pol
        if pol < mi:
            mi = pol
        features.append(pol)
        features.append(sub)
    features.append(np.abs(ma - mi))

    # Extract features of fourths.
    size = len(tokens) // 4
    parts = []
    i = 0
    while i <= len(tokens):
        if i == 3 * size:
            parts.append(tokens[i:])
            break
        else:
            parts.append(tokens[i:i + size])
            i += size
    ma = -2
    mi = 2
    for x in range(0, len(parts)):
        part = parts[x]
        fourthsBlob = TextBlob(joinTokens(part))
        pol = fourthsBlob.sentiment.polarity
        sub = fourthsBlob.sentiment.subjectivity
        if pol > ma:
            ma = pol
        if pol < mi:
            mi = pol
        features.append(pol)
        features.append(sub)
    features.append(np.abs(ma - mi))

    return features
コード例 #8
0
    def extract_feature_of_sentence(self, sen):
        # type: (object) -> object
        """
        This method extracts features of a single sentence.
        We have following list of features being extracted.
        1. Full sentence Polarity
        2. Full sentence Subjectivity
        3. Half sentence Polarity (1/2 and 2/2)
        4. Half sentence Subjectivity (1/2 and 2/2)
        5. Difference between polarities of two halves
        6. Third sentence Polarity (1/3, 2/3 and 3/3)
        7. Third sentence Subjectivity (1/3, 2/3 and 3/3)
        8. Difference between max and min polarity of the thirds.
        9. Fourth sentence Polarity (1/4, 2/4, 3/4 and 4/4)
        10. Fourth sentence Subjectivity (1/4, 2/4, 3/4 and 4/4)
        11. Difference between max and min polarities of the fourths.

        Like this we extract 23 features of a single sentence.
        :param sen:
        :return:
        """
        features = []

        # Tokenize the sentence and then convert everything to lower case.
        tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen)))
        tokens = [(t.lower()) for t in tokens]

        # Extract features of full sentence.
        fullBlob = TextBlob(self.join_tokens(tokens))
        features.append(fullBlob.sentiment.polarity)
        features.append(fullBlob.sentiment.subjectivity)

        # Extract features of halves.
        size = len(tokens) // 2
        parts = []
        i = 0
        while i <= len(tokens):
            if i == size:
                parts.append(tokens[i:])
                break
            else:
                parts.append(tokens[i:i + size])
                i += size
        for x in range(0, len(parts)):
            part = parts[x]
            halfBlob = TextBlob(self.join_tokens(part))
            features.append(halfBlob.sentiment.polarity)
            features.append(halfBlob.sentiment.subjectivity)
        features.append(np.abs(features[-2] - features[-4]))

        # Extract features of thirds.
        size = len(tokens) // 3
        parts = []
        i = 0
        while i <= len(tokens):
            if i == 2 * size:
                parts.append(tokens[i:])
                break
            else:
                parts.append(tokens[i:i + size])
                i += size

        ma = -2
        mi = 2
        for x in range(0, len(parts)):
            part = parts[x]
            thirdsBlob = TextBlob(self.join_tokens(part))
            pol = thirdsBlob.sentiment.polarity
            sub = thirdsBlob.sentiment.subjectivity
            if pol > ma:
                ma = pol
            if pol < mi:
                mi = pol
            features.append(pol)
            features.append(sub)
        features.append(np.abs(ma - mi))

        # Extract features of fourths.
        size = len(tokens) // 4
        parts = []
        i = 0
        while i <= len(tokens):
            if i == 3 * size:
                parts.append(tokens[i:])
                break
            else:
                parts.append(tokens[i:i + size])
                i += size
        ma = -2
        mi = 2
        for x in range(0, len(parts)):
            part = parts[x]
            fourths_blob = TextBlob(self.join_tokens(part))
            pol = fourths_blob.sentiment.polarity
            sub = fourths_blob.sentiment.subjectivity
            if pol > ma:
                ma = pol
            if pol < mi:
                mi = pol
            features.append(pol)
            features.append(sub)
        features.append(np.abs(ma - mi))

        return features