def __init__(self): self.posdict = tp.get_txt_data("../data/review/posdict.txt","lines") self.negdict = tp.get_txt_data("../data/review/negdict.txt","lines") self.mostdict = tp.get_txt_data('../data/review/most.txt', 'lines') self.verydict = tp.get_txt_data('../data/review/very.txt', 'lines') self.moredict = tp.get_txt_data('../data/review/more.txt', 'lines') self.ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines') self.insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt', 'lines') self.inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines') self.stopwords = tp.get_txt_data('../data/review/sentiment_stopword.txt', 'lines')
def word_by_word_review(filepath, sheetnum, colnum): # Read product review data from excel file and segment every review review_data = [] for cell in tp.get_excel_data( filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]: review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew # Read txt file contain stopwords stopwords = tp.get_txt_data( '/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines') # Filter stopwords from reviews seg_fil_result = [] for review in review_data: fil = [ word for word in review if word not in stopwords and word != ' ' ] seg_fil_result.append(fil) fil = [] # Return review set as onedimentional list review = list(itertools.chain(*seg_fil_result)) return review
def word_by_word_review(filepath, sheetnum, colnum): # Read product review data from excel file and segment every review review_data = [] for cell in tp.get_excel_data( filepath, sheetnum, colnum, 'data')[0:tp.get_excel_data(filepath, sheetnum, colnum, 'rownum')]: review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew # Read txt file contain stopwords """ stopwords = tp.get_txt_data('D:/code/stopword.txt', 'lines') """ stopwords = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/PreprocessingModule/stopword.txt', 'lines') # Filter stopwords from reviews seg_fil_result = [] for review in review_data: fil = [ word for word in review if word not in stopwords and word != ' ' ] seg_fil_result.append(fil) fil = [] # Return review set as onedimentional list review = list(itertools.chain(*seg_fil_result)) return review
def result(): path = "I:\\Code\\Python\\Emotion\\1020_loupan_comment" filelist = os.listdir(path) file_name = [] for files in filelist: Olddir = os.path.join(path, files) if (os.path.isdir(Olddir)): continue filename = os.path.splitext(files)[0] readStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_comment\\' + filename + '.txt' writeStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_result\\' + filename + '.txt' file_name.append(filename) review = tp.get_txt_data(readStr, 'lines') store_sentiment_dictionary_score(review, writeStr) resultFile = open("I:\\Code\\Python\\Emotion\\1020_loupan_resultValue.txt", 'w') for i in range(len(resultValue)): resultFile.write(str(file_name[i]) + ',' + str(resultValue[i]) + '\n') resultFile.close()
def word_by_word_review(filepath, sheetnum, colnum): # Read product review data from excel file and segment every review review_data = [] for cell in tp.get_excel_data(filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]: review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew # Read txt file contain stopwords stopwords = tp.get_txt_data('/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines') # Filter stopwords from reviews seg_fil_result = [] for review in review_data: fil = [word for word in review if word not in stopwords and word != ' '] seg_fil_result.append(fil) fil = [] # Return review set as onedimentional list review = list(itertools.chain(*seg_fil_result)) return review
def result(): path = "I:\\Code\\Python\\Emotion\\0330_comment" filelist = os.listdir(path) file_name = [] for files in filelist: # all files Olddir = os.path.join(path,files) if(os.path.isdir(Olddir)): continue filename = os.path.splitext(files)[0] readStr = 'I:\\Code\\Python\\Emotion\\0330_comment\\'+ filename +'.txt' writeStr = 'I:\\Code\\Python\\Emotion\\0330_result_each\\'+ filename +'.txt' file_name.append(filename) review = tp.get_txt_data(readStr, 'lines') store_sentiment_dictionary_score(review,writeStr) # analysis each goods resultFile = open("I:\\Code\\Python\\Emotion\\0330_total_emotion.txt", 'w') for i in range(len(resultValue)): resultFile.write(str(file_name[i])) for j in xrange(len(resultValue[i])): resultFile.write(','+str(resultValue[i][j])) resultFile.write('\n') resultFile.close()
""" Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ from __future__ import division import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt", "lines", ) negdict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt", "lines", ) # Load adverbs of degree dictionary mostdict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/most.txt", "lines", ) verydict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/very.txt", "lines", )
import numpy as np import os import textprocessing as tp # 1. Load dictionary and dataset # Load SentimentDictionary """ # Load dataset review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data") """ posdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\posdict.txt", "lines") negdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\negdict.txt", "lines") # Load AdverbsOfDegreeDictionary mostdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\most.txt', 'lines') verydict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\very.txt', 'lines') moredict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\more.txt', 'lines') ishdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\ish.txt', 'lines') insufficientdict = tp.get_txt_data( 'I:\\Code\\Python\\Emotion\\insufficiently.txt', 'lines') inversedict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\inverse.txt', 'lines') # Load dataset #review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/HTC.xlsx", 1, 12, "data") #review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/OPPO.xlsx", 1, 12, "data")
#! /usr/bin/env python2.7 #coding=utf-8 """ Counting the product name, product brand and product attribute appear times in the review. This module aim to extract product name, brand and attribute features. """ import textprocessing as tp # Read txt files include product name, product brand and product attributes name = tp.get_txt_data("D:/code/product_name.txt", "lines") brand = tp.get_txt_data("D:/code/product_brand.txt", "lines") attribute = tp.get_txt_data("D:/code/product_attribute", "lines") # Function counting feature appearing times def name_brand_attribute(dataset): num = [] n, b, a = 0, 0, 0 for review in dataset: for word in review: if word in name: n += 1 elif word in brand: b += 1 elif word in attribute: a += 1 num.append((n, b, a)) n, b, a = 0, 0, 0 return num
""" Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import sys sys.path.append("../../../Preprocessing module") import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data( "./sentiment dictionary/positive and negative dictionary/posdict.txt", "lines") negdict = tp.get_txt_data( "./sentiment dictionary/positive and negative dictionary//negdict.txt", "lines") # Load adverbs of degree dictionary mostdict = tp.get_txt_data( './sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines') verydict = tp.get_txt_data( './sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines') moredict = tp.get_txt_data( './sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines') ishdict = tp.get_txt_data( './sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines') insufficientdict = tp.get_txt_data(
# # tname=name # # print tname,max # sig_re='太逗了 无语 帅帅的 我靠,人死了还有呼吸' # # seg_list = tp.segmentation(sig_re, 'list') # 1. Load positive and negative review data # pos_review = tp.seg_fil_txt("/home/hadoop/goodnew.txt") # neg_review = tp.seg_fil_txt("/home/hadoop/badnew.txt") #sentiment_review = tp.seg_fil_txt("/home/hadoop/iyTop10/万物生长") sentiment_review = tp.get_txt_data("/home/hadoop/成人记2电影版客观性.txt",'lines') posdict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt", "lines") negdict = tp.get_txt_data( "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt", "lines") stopwords = tp.get_txt_data('/home/hadoop/coding/stopword.txt', 'lines') posdict.extend(negdict) i=0 sen_cur=[] p_center = open("/home/hadoop/建国大业客观性.txt",'w+') for sig_re in sentiment_review: #sig_re='挺棒'
#! /usr/bin/env python2.7 #coding=utf-8 """ Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/posdict.txt", "lines") negdict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/negdict.txt", "lines") # Load adverbs of degree dictionary mostdict = tp.get_txt_data( '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/most.txt', 'lines') verydict = tp.get_txt_data( '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/very.txt', 'lines') moredict = tp.get_txt_data( '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt', 'lines')
# Load AdverbsOfDegreeDictionary mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines') verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines') moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines') ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines') insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines') inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines') # Load dataset review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data") """ posdict = tp.get_txt_data( "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/PositiveAndNegativeDictionary/posdict.txt", "lines") negdict = tp.get_txt_data( "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/PositiveAndNegativeDictionary/negdict.txt", "lines") # Load AdverbsOfDegreeDictionary mostdict = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/most.txt', 'lines') verydict = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/very.txt', 'lines') moredict = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/more.txt', 'lines')
#! /usr/bin/env python2.7 #coding=utf-8 """ Counting the product name, product brand and product attribute appear times in the review. This module aim to extract product name, brand and attribute features. """ import textprocessing as tp # Read txt files include product name, product brand and product attributes name = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/name.txt", "lines") brand = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/brand.txt", "lines") attribute = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/attribute.txt", "lines") # Function counting feature appearing times def name_brand_attribute(dataset): num = [] n, b, a = 0, 0, 0 for review in dataset: for word in review: if word in name: n += 1 elif word in brand: b += 1 elif word in attribute: a += 1 num.append((n, b, a)) n, b, a = 0, 0, 0 return num
""" Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/posdict.txt", "lines" ) negdict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/negdict.txt", "lines" ) # Load adverbs of degree dictionary mostdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/most.txt", "lines") verydict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/very.txt", "lines") moredict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt", "lines") ishdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/ish.txt", "lines") insufficientdict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/insufficiently.txt", "lines" ) inversedict = tp.get_txt_data( "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/inverse.txt", "lines"
# 4. Store sentiment dictionary features def store_sentiment_dictionary_score(review_set, storepath): sentiment_score = all_review_sentiment_score( sentence_sentiment_score(review_set)) f = open(storepath, 'w') for i in sentiment_score: f.write( str(i[0]) + '\t' + str(i[1]) + '\t' + str(i[2]) + '\t' + str(i[3]) + '\t' + str(i[4]) + '\t' + str(i[5]) + '\n') f.close() if __name__ == '__main__': posdict = tp.get_txt_data("../data/review/posdict.txt", "lines") negdict = tp.get_txt_data("../data/review/negdict.txt", "lines") mostdict = tp.get_txt_data('../data/review/most.txt', 'lines') verydict = tp.get_txt_data('../data/review/very.txt', 'lines') moredict = tp.get_txt_data('../data/review/more.txt', 'lines') ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines') insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt', 'lines') inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines') review = tp.get_txt_data('reivew.txt', 'lines') print len(review) print single_review_sentiment_score(review[0]) review_score = all_review_sentiment_score(sentence_sentiment_score(review)) for index, score in enumerate(review_score): #print review[index], score
Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import sys sys.path.append("../../../Preprocessing module") sys.path.append("./Chinese-Sentiment-master") import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/posdict.txt","lines") negdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/negdict.txt","lines") # Load adverbs of degree dictionary mostdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines') verydict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines') moredict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines') ishdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines') insufficientdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/insufficiently.txt', 'lines') inversedict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/inverse.txt', 'lines') # Load dataset # review = tp.get_excel_data("../Machine learning features/seniment review set/pos_review.xlsx", 1, 1, "data") # 2. Sentiment dictionary analysis basic function
#! /usr/bin/env python2.7 #coding=utf-8 """ Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt", "lines") negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt", "lines") # Load adverbs of degree dictionary mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines') verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines') moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines') ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines') insufficientdict = tp.get_txt_data( 'D:/code/sentiment_dictionary/insufficiently.txt', 'lines') inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines') # Load dataset review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")
""" Compute a review's positive and negative score, their average score and standard deviation. This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features). Sentiment analysis based on sentiment dictionary. """ import numpy as np import textprocessing as tp # 1. Load dictionary and dataset # Load sentiment dictionary posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt","lines") negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt","lines") # Load adverbs of degree dictionary mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines') verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines') moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines') ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines') insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines') inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines') # Load dataset review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data") # 2. Sentiment dictionary analysis basic function
break i += 1 single_review_senti_score.append(self.transform_to_positive_num(poscount, negcount)) score_array = np.array(single_review_senti_score) pos_score = np.sum(score_array[:, 0]) neg_score = np.sum(score_array[:, 1]) return [pos_score, neg_score] def single_sentiment_score(self, review): sentences_words = self.cut_sentences_words(review) self.sentences_words = [] self.sentences_words = sentences_words scores = self.get_single_sent_count(sentences_words) return scores[0], scores[1] def sentence_sentiment_score(self, dataset): dataset = dataset[1:10] for review in dataset: scores = self.single_sentiment_score(review) print scores[0], scores[1] if __name__ == '__main__': review = tp.get_txt_data('reivew.txt', 'lines') dict_sentiment = DictSentiment() print len(review) print dict_sentiment.single_sentiment_score(review[0]) dict_sentiment.sentence_sentiment_score(review)
""" Counting the product name, product brand and product attribute appear times in the review. This module aim to extract product_name_brand_attribute features. """ import textprocessing as tp # Read txt files include product name, product brand and product attributes """ name = tp.get_txt_data("D:/code/product_name.txt", "lines") brand = tp.get_txt_data("D:/code/product_brand.txt", "lines") attribute = tp.get_txt_data("D:/code/product_attribute", "lines") """ name = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/name.txt', 'lines') brand = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/brand.txt', 'lines') attribute = tp.get_txt_data( 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/attibute.txt', 'lines') # Function counting feature appearing times def name_brand_attribute(dataset): num = [] n, b, a = 0, 0, 0 for review in dataset: for word in review:
#! /usr/bin/env python2.7 #coding=utf-8 """ Counting the product name, product brand and product attribute appear times in the review. This module aim to extract ProductNameBrandAndAttribute features. """ import textprocessing as tp # Read txt files include product name, product brand and product attributes name = tp.get_txt_data("D:/code/product_name.txt", "lines") brand = tp.get_txt_data("D:/code/product_brand.txt", "lines") attribute = tp.get_txt_data("D:/code/product_attribute", "lines") # Function counting feature appearing times def name_brand_attribute(dataset): num = [] n, b, a = 0, 0, 0 for review in dataset: for word in review: if word in name: n += 1 elif word in brand: b += 1 elif word in attribute: a += 1 num.append((n, b, a)) n, b, a = 0, 0, 0 return num