예제 #1
0
 def __init__(self):
     self.posdict = tp.get_txt_data("../data/review/posdict.txt","lines")
     self.negdict = tp.get_txt_data("../data/review/negdict.txt","lines")
     self.mostdict = tp.get_txt_data('../data/review/most.txt', 'lines')
     self.verydict = tp.get_txt_data('../data/review/very.txt', 'lines')
     self.moredict = tp.get_txt_data('../data/review/more.txt', 'lines')
     self.ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines')
     self.insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt', 'lines')
     self.inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines')
     self.stopwords = tp.get_txt_data('../data/review/sentiment_stopword.txt', 'lines')
예제 #2
0
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data(
        '/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
예제 #3
0
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:tp.get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    """
    stopwords = tp.get_txt_data('D:/code/stopword.txt', 'lines')
    """
    stopwords = tp.get_txt_data(
        'E:/GraduationProject/pythoncode/project/Prediction/main/PreprocessingModule/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
예제 #4
0
def result():
    path = "I:\\Code\\Python\\Emotion\\1020_loupan_comment"
    filelist = os.listdir(path)
    file_name = []
    for files in filelist:
        Olddir = os.path.join(path, files)
        if (os.path.isdir(Olddir)):
            continue
        filename = os.path.splitext(files)[0]
        readStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_comment\\' + filename + '.txt'
        writeStr = 'I:\\Code\\Python\\Emotion\\1020_loupan_result\\' + filename + '.txt'
        file_name.append(filename)
        review = tp.get_txt_data(readStr, 'lines')
        store_sentiment_dictionary_score(review, writeStr)
    resultFile = open("I:\\Code\\Python\\Emotion\\1020_loupan_resultValue.txt",
                      'w')
    for i in range(len(resultValue)):
        resultFile.write(str(file_name[i]) + ',' + str(resultValue[i]) + '\n')
    resultFile.close()
def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data('/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [word for word in review if word not in stopwords and word != ' ']
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review
예제 #6
0
def result():
    path = "I:\\Code\\Python\\Emotion\\0330_comment"
    filelist = os.listdir(path)
    file_name = []
    for files in filelist:  # all files
        Olddir = os.path.join(path,files)
        if(os.path.isdir(Olddir)):
            continue
        filename = os.path.splitext(files)[0]
        readStr = 'I:\\Code\\Python\\Emotion\\0330_comment\\'+ filename +'.txt'
        writeStr = 'I:\\Code\\Python\\Emotion\\0330_result_each\\'+ filename +'.txt'
        file_name.append(filename)
        review = tp.get_txt_data(readStr, 'lines')
        store_sentiment_dictionary_score(review,writeStr)      # analysis each goods
    resultFile = open("I:\\Code\\Python\\Emotion\\0330_total_emotion.txt", 'w')
    for i in range(len(resultValue)):
        resultFile.write(str(file_name[i]))
        for j in xrange(len(resultValue[i])):
            resultFile.write(','+str(resultValue[i][j]))
        resultFile.write('\n')
    resultFile.close()
"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""

from __future__ import division
import numpy as np
import textprocessing as tp


# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt",
    "lines",
)
negdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt",
    "lines",
)

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/most.txt",
    "lines",
)
verydict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/adverbs of degree dictionary/very.txt",
    "lines",
)
import numpy as np
import os
import textprocessing as tp

# 1. Load dictionary and dataset
# Load SentimentDictionary
"""


# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")

"""

posdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\posdict.txt", "lines")
negdict = tp.get_txt_data("I:\\Code\\Python\\Emotion\\negdict.txt", "lines")

# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\most.txt', 'lines')
verydict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\very.txt', 'lines')
moredict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\more.txt', 'lines')
ishdict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'I:\\Code\\Python\\Emotion\\insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('I:\\Code\\Python\\Emotion\\inverse.txt',
                              'lines')

# Load dataset
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/HTC.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/OPPO.xlsx", 1, 12, "data")
#! /usr/bin/env python2.7
#coding=utf-8

"""
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product name, brand and attribute features.

"""

import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num
예제 #10
0
"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""

import sys
sys.path.append("../../../Preprocessing module")
import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "./sentiment dictionary/positive and negative dictionary/posdict.txt",
    "lines")
negdict = tp.get_txt_data(
    "./sentiment dictionary/positive and negative dictionary//negdict.txt",
    "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines')
verydict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines')
moredict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data(
    './sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
# #       tname=name
#
# print tname,max

# sig_re='太逗了 无语 帅帅的 我靠,人死了还有呼吸'
#
# seg_list = tp.segmentation(sig_re, 'list')


# 1. Load positive and negative review data
# pos_review = tp.seg_fil_txt("/home/hadoop/goodnew.txt")
# neg_review = tp.seg_fil_txt("/home/hadoop/badnew.txt")

#sentiment_review = tp.seg_fil_txt("/home/hadoop/iyTop10/万物生长")

sentiment_review = tp.get_txt_data("/home/hadoop/成人记2电影版客观性.txt",'lines')

posdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/posdict.txt",
    "lines")
negdict = tp.get_txt_data(
    "/home/hadoop/coding/Sentiment features/Sentiment dictionary features/sentiment dictionary/positive and negative dictionary/negdict.txt",
    "lines")
stopwords = tp.get_txt_data('/home/hadoop/coding/stopword.txt', 'lines')
posdict.extend(negdict)

i=0
sen_cur=[]
p_center = open("/home/hadoop/建国大业客观性.txt",'w+')
for sig_re in sentiment_review:
    #sig_re='挺棒'
예제 #12
0
#! /usr/bin/env python2.7
#coding=utf-8
"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""

import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/posdict.txt",
    "lines")
negdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/negdict.txt",
    "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/most.txt',
    'lines')
verydict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/very.txt',
    'lines')
moredict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt',
    'lines')
# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")

"""

posdict = tp.get_txt_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/PositiveAndNegativeDictionary/posdict.txt",
    "lines")
negdict = tp.get_txt_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/PositiveAndNegativeDictionary/negdict.txt",
    "lines")

# Load AdverbsOfDegreeDictionary
mostdict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/most.txt',
    'lines')
verydict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/very.txt',
    'lines')
moredict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/more.txt',
    'lines')
예제 #14
0
#! /usr/bin/env python2.7
#coding=utf-8

"""
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product name, brand and attribute features.

"""

import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/name.txt", "lines")
brand = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/brand.txt", "lines")
attribute = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/attribute.txt", "lines")

# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num
"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""


import numpy as np
import textprocessing as tp


# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/posdict.txt", "lines"
)
negdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/negdict.txt", "lines"
)

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/most.txt", "lines")
verydict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/very.txt", "lines")
moredict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt", "lines")
ishdict = tp.get_txt_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/ish.txt", "lines")
insufficientdict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/insufficiently.txt", "lines"
)
inversedict = tp.get_txt_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/inverse.txt", "lines"
예제 #16
0

# 4. Store sentiment dictionary features
def store_sentiment_dictionary_score(review_set, storepath):
    sentiment_score = all_review_sentiment_score(
        sentence_sentiment_score(review_set))
    f = open(storepath, 'w')
    for i in sentiment_score:
        f.write(
            str(i[0]) + '\t' + str(i[1]) + '\t' + str(i[2]) + '\t' +
            str(i[3]) + '\t' + str(i[4]) + '\t' + str(i[5]) + '\n')
    f.close()


if __name__ == '__main__':
    posdict = tp.get_txt_data("../data/review/posdict.txt", "lines")
    negdict = tp.get_txt_data("../data/review/negdict.txt", "lines")
    mostdict = tp.get_txt_data('../data/review/most.txt', 'lines')
    verydict = tp.get_txt_data('../data/review/very.txt', 'lines')
    moredict = tp.get_txt_data('../data/review/more.txt', 'lines')
    ishdict = tp.get_txt_data('../data/review/ish.txt', 'lines')
    insufficientdict = tp.get_txt_data('../data/review/insufficiently.txt',
                                       'lines')
    inversedict = tp.get_txt_data('../data/review/inverse.txt', 'lines')
    review = tp.get_txt_data('reivew.txt', 'lines')

    print len(review)
    print single_review_sentiment_score(review[0])
    review_score = all_review_sentiment_score(sentence_sentiment_score(review))
    for index, score in enumerate(review_score):
        #print review[index], score
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""

import sys
sys.path.append("../../../Preprocessing module")
sys.path.append("./Chinese-Sentiment-master")
import numpy as np
import textprocessing as tp


# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("./Chinese-Sentiment-master/sentiment dictionary/positive and negative dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('./Chinese-Sentiment-master/sentiment dictionary/adverbs of degree dictionary/inverse.txt', 'lines')

# Load dataset
# review = tp.get_excel_data("../Machine learning features/seniment review set/pos_review.xlsx", 1, 1, "data")


# 2. Sentiment dictionary analysis basic function
#! /usr/bin/env python2.7
#coding=utf-8
"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""

import numpy as np
import textprocessing as tp

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt", "lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt", "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt',
                              'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")

"""
Compute a review's positive and negative score, their average score and standard deviation.
This module aim to extract review positive/negative score, average score and standard deviation features (all 6 features).
Sentiment analysis based on sentiment dictionary.

"""


import numpy as np
import textprocessing as tp


# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


# 2. Sentiment dictionary analysis basic function
예제 #20
0
                           break
                i += 1
            single_review_senti_score.append(self.transform_to_positive_num(poscount, negcount))
        score_array = np.array(single_review_senti_score)
        pos_score = np.sum(score_array[:, 0])
        neg_score = np.sum(score_array[:, 1])
        return [pos_score, neg_score]

    def single_sentiment_score(self, review):
        sentences_words = self.cut_sentences_words(review)
        self.sentences_words = []
        self.sentences_words = sentences_words
        scores = self.get_single_sent_count(sentences_words)
        return scores[0], scores[1]

    def sentence_sentiment_score(self, dataset):
        dataset = dataset[1:10]
        for review in dataset:
            scores = self.single_sentiment_score(review)
            print scores[0], scores[1]

if __name__ == '__main__':
    review = tp.get_txt_data('reivew.txt', 'lines')
    dict_sentiment = DictSentiment()

    print len(review)
    print dict_sentiment.single_sentiment_score(review[0])
    dict_sentiment.sentence_sentiment_score(review)


예제 #21
0
"""
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract product_name_brand_attribute features.

"""

import textprocessing as tp

# Read txt files include product name, product brand and product attributes
"""
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")
"""
name = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/name.txt',
    'lines')
brand = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/brand.txt',
    'lines')
attribute = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/InformativeFeatures/product_name_brand_attribute/attibute.txt',
    'lines')


# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
예제 #22
0
#! /usr/bin/env python2.7
#coding=utf-8
"""
Counting the product name, product brand and product attribute appear times in the review.
This module aim to extract ProductNameBrandAndAttribute features.

"""

import textprocessing as tp

# Read txt files include product name, product brand and product attributes
name = tp.get_txt_data("D:/code/product_name.txt", "lines")
brand = tp.get_txt_data("D:/code/product_brand.txt", "lines")
attribute = tp.get_txt_data("D:/code/product_attribute", "lines")


# Function counting feature appearing times
def name_brand_attribute(dataset):
    num = []
    n, b, a = 0, 0, 0
    for review in dataset:
        for word in review:
            if word in name:
                n += 1
            elif word in brand:
                b += 1
            elif word in attribute:
                a += 1
        num.append((n, b, a))
        n, b, a = 0, 0, 0
    return num