コード例 #1
 def __init__(self):
     self.posdict = tp.get_txt_data("../data/review/posdict.txt","lines")
     self.negdict = tp.get_txt_data("../data/review/negdict.txt","lines")
     self.mostdict = tp.get_txt_data('../data/review/most.txt', 'lines')
     self.verydict = tp.get_txt_data('../data/review/very.txt', 'lines')
     self.moredict = tp.get_txt_data('../data/review/more.txt', 'lines')
     self.ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines')
     self.insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt', 'lines')
     self.inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines')
     self.stopwords = tp.get_txt_data('../data/review/sentiment_stopword.txt', 'lines')
コード例 #2
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data(

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
コード例 #3
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:tp.get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data('D:/code/stopword.txt', 'lines')
    stopwords = tp.get_txt_data(

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
コード例 #4
def result():
    path = "I:\\Code\\Python\\Emotion\\1020_loupan_comment"
    filelist = os.listdir(path)
    file_name = []
    for files in filelist:
        Olddir = os.path.join(path, files)
        if (os.path.isdir(Olddir)):
        filename = os.path.splitext(files)[0]
        readStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_comment\\' + filename + '.txt'
        writeStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_result\\' + filename + '.txt'
        review = tp.get_txt_data(readStr, 'lines')
        store_sentiment_dictionary_score(review, writeStr)
    resultFile = open("I:\\Code\\Python\\Emotion\\1020_loupan_resultValue.txt",
    for i in range(len(resultValue)):
        resultFile.write(str(file_name[i]) + ',' + str(resultValue[i]) + '\n')
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data('/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [word for word in review if word not in stopwords and word != ' ']
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
コード例 #6
def result():
    path = "I:\\Code\\Python\\Emotion\\0330_comment"
    filelist = os.listdir(path)
    file_name = []
    for files in filelist:  # all files
        Olddir = os.path.join(path,files)
        filename = os.path.splitext(files)[0]
        readStr = 'I:\\Code\\Python\\Emotion\\0330_comment\\'+ filename +'.txt'
        writeStr = 'I:\\Code\\Python\\Emotion\\0330_result_each\\'+ filename +'.txt'
        review = tp.get_txt_data(readStr, 'lines')
        store_sentiment_dictionary_score(review,writeStr)      # analysis each goods
    resultFile = open("I:\\Code\\Python\\Emotion\\0330_total_emotion.txt", 'w')
    for i in range(len(resultValue)):
        for j in xrange(len(resultValue[i])):
コード例 #7
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


from __future__ import division
import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt",
negdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt",

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/most.txt",
verydict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/very.txt",
import numpy as np
import os
import textprocessing as tp

# 1. Load dictionary and dataset
# Load SentimentDictionary

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


posdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\posdict.txt", "lines")
negdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\negdict.txt", "lines")

# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\most.txt', 'lines')
verydict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\very.txt', 'lines')
moredict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\more.txt', 'lines')
ishdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'I:\\Code\\Python\\Emotion\\insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\inverse.txt',

# Load dataset
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/HTC.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/OPPO.xlsx", 1, 12, "data")
#! /usr/bin/env python2.7

Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product name, brand and attribute features.


import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num
コード例 #10
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import sys
sys.path.append("../../../Preprocessing module")
import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "./sentiment dictionary/positive and negative dictionary/posdict.txt",
negdict = tp.get_txt_data(
    "./sentiment dictionary/positive and negative dictionary//negdict.txt",

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines')
verydict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines')
moredict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
コード例 #11
# #       tname=name
# print tname,max

# sig_re='太逗了 无语 帅帅的 我靠,人死了还有呼吸'
# seg_list = tp.segmentation(sig_re, 'list')

# 1. Load positive and negative review data
# pos_review = tp.seg_fil_txt("/home/hadoop/goodnew.txt")
# neg_review = tp.seg_fil_txt("/home/hadoop/badnew.txt")

#sentiment_review = tp.seg_fil_txt("/home/hadoop/iyTop10/万物生长")

sentiment_review = tp.get_txt_data("/home/hadoop/成人记2电影版客观性.txt",'lines')

posdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt",
negdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt",
stopwords = tp.get_txt_data('/home/hadoop/coding/stopword.txt', 'lines')

p_center = open("/home/hadoop/建国大业客观性.txt",'w+')
for sig_re in sentiment_review:
コード例 #12
#! /usr/bin/env python2.7
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
negdict = tp.get_txt_data(

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
verydict = tp.get_txt_data(
moredict = tp.get_txt_data(
# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


posdict = tp.get_txt_data(
negdict = tp.get_txt_data(

# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data(
verydict = tp.get_txt_data(
moredict = tp.get_txt_data(
コード例 #14
#! /usr/bin/env python2.7

Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product name, brand and attribute features.


import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/name.txt", "lines")
brand = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/brand.txt", "lines")
attribute = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/attribute.txt", "lines")

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/posdict.txt", "lines"
negdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/negdict.txt", "lines"

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/most.txt", "lines")
verydict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/very.txt", "lines")
moredict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt", "lines")
ishdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/ish.txt", "lines")
insufficientdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/insufficiently.txt", "lines"
inversedict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/inverse.txt", "lines"
コード例 #16
ファイル: sentiment.py プロジェクト: xzm2004260/AByteOfNLP

# 4. Store sentiment dictionary features
def store_sentiment_dictionary_score(review_set, storepath):
    sentiment_score = all_review_sentiment_score(
    f = open(storepath, 'w')
    for i in sentiment_score:
            str(i[0]) + '\t' + str(i[1]) + '\t' + str(i[2]) + '\t' +
            str(i[3]) + '\t' + str(i[4]) + '\t' + str(i[5]) + '\n')

if __name__ == '__main__':
    posdict = tp.get_txt_data("../data/review/posdict.txt", "lines")
    negdict = tp.get_txt_data("../data/review/negdict.txt", "lines")
    mostdict = tp.get_txt_data('../data/review/most.txt', 'lines')
    verydict = tp.get_txt_data('../data/review/very.txt', 'lines')
    moredict = tp.get_txt_data('../data/review/more.txt', 'lines')
    ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines')
    insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt',
    inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines')
    review = tp.get_txt_data('reivew.txt', 'lines')

    print len(review)
    print single_review_sentiment_score(review[0])
    review_score = all_review_sentiment_score(sentence_sentiment_score(review))
    for index, score in enumerate(review_score):
        #print review[index], score
コード例 #17
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import sys
sys.path.append("../../../Preprocessing module")
import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/inverse.txt', 'lines')

# Load dataset
# review = tp.get_excel_data("../Machine learning features/seniment review set/pos_review.xlsx", 1, 1, "data")

# 2. Sentiment dictionary analysis basic function
#! /usr/bin/env python2.7
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt", "lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt", "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt',

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")

Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.


import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")

# 2. Sentiment dictionary analysis basic function
コード例 #20
                i += 1
            single_review_senti_score.append(self.transform_to_positive_num(poscount, negcount))
        score_array = np.array(single_review_senti_score)
        pos_score = np.sum(score_array[:, 0])
        neg_score = np.sum(score_array[:, 1])
        return [pos_score, neg_score]

    def single_sentiment_score(self, review):
        sentences_words = self.cut_sentences_words(review)
        self.sentences_words = []
        self.sentences_words = sentences_words
        scores = self.get_single_sent_count(sentences_words)
        return scores[0], scores[1]

    def sentence_sentiment_score(self, dataset):
        dataset = dataset[1:10]
        for review in dataset:
            scores = self.single_sentiment_score(review)
            print scores[0], scores[1]

if __name__ == '__main__':
    review = tp.get_txt_data('reivew.txt', 'lines')
    dict_sentiment = DictSentiment()

    print len(review)
    print dict_sentiment.single_sentiment_score(review[0])

コード例 #21
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product_name_brand_attribute features.


import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")
name = tp.get_txt_data(
brand = tp.get_txt_data(
attribute = tp.get_txt_data(

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
コード例 #22
#! /usr/bin/env python2.7
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract ProductNameBrandAndAttribute features.


import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num