Python get_excel_data Beispiele, textprocessing.get_excel_data Python Beispiele

Beispiel #1

0

Datei anzeigen

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:tp.get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    """
    stopwords = tp.get_txt_data('D:/code/stopword.txt', 'lines')
    """
    stopwords = tp.get_txt_data(
        'E:/GraduationProject/pythoncode/project/Prediction/main/PreprocessingModule/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Beispiel #2

0

Datei anzeigen

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data(
        '/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Beispiel #3

0

Datei anzeigen

Datei: WordSentenceLengthFeature.py Projekt: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath):
    data = tp.get_excel_data(filepath, sheetnum, colnum, 'data')

    word_sent_num = word_sent_count(data)  #需要初始化
    print word_sent_num
    f = open(storepath, 'w')
    for i in word_sent_num:
        f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n')
    f.close()

Beispiel #4

0

Datei anzeigen

Datei: adj_adv_v_feature.py Projekt: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath):
    data = tp.get_excel_data(filepath, sheetnum, colnum, 'data')

    adj_adv_num = count_adj_adv(data)

    f = open(storepath, 'w')
    for i in adj_adv_num:
        f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n')
    f.close()

Beispiel #5

0

Datei anzeigen

Datei: entropy_perplexity_feature.py Projekt: vsooda/Review-Helpfulness-Prediction

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data('/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [word for word in review if word not in stopwords and word != ' ']
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Beispiel #6

0

Datei anzeigen

import sklearn
from nltk.classify.scikitlearn import SklearnClassifier

##my classifier path
filefeature = 'E:/GraduationProject/pythoncode/project/Prediction/main/result/feature_word_ngram.txt'
filename = 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/senti_class_word_ngram.pkl'

# 1. Load data
"""
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1", "data")
sentiment_review = tp.seg_fil_senti_excel("D:/code/sentiment_test/review_set.xlsx", "1", "1")
"""

review = tp.get_excel_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx",
    1, 12, "data")

sentiment_review = tp.seg_fil_senti_excel(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx",
    1, 12)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel(
        "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/SenimentReviewSet/pos_review.xlsx",
        1, 1)
    negdata = tp.seg_fil_senti_excel(
        "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/SenimentReviewSet/neg_review.xlsx",

Beispiel #7

0

Datei anzeigen

Datei: pos neg ml feature.py Projekt: EricChanBD/Review-Helpfulness-Prediction

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1")
sentiment_review = tp.seg_fil_senti_excel("D:/code/sentiment_test/review_set.xlsx", "1", "1")


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/pos_review.xlsx", "1", "1")
    negdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/neg_review.xlsx", "1", "1")
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Beispiel #8

0

Datei anzeigen

Datei: pos neg ml feature.py Projekt: wac81/Sentiment-Classifier-base-on-Machine-learning

sys.path.append("./Preprocessing module/")
import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("../../../Review set/review_set.xlsx", 1, 7, "data")
sentiment_review = tp.seg_fil_senti_excel("../../../Review set/review_set.xlsx", 1, 7)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Beispiel #9

0

Datei anzeigen

Datei: test.py Projekt: timeahead/Chinese-Sentiment

__author__ = 'anchengwu'
#coding=utf-8

import sys
sys.path.append("../../../Preprocessing module")

import pos_neg_senti_dict_feature as pn
import textprocessing as tp

# Load dataset
review = tp.get_excel_data(
    "../Machine learning features/seniment review set/pos_review.xlsx", 1, 1,
    "data")

#test single dataset
print pn.single_review_sentiment_score(
    '买过散装的粽子才来买礼盒的，礼盒很大气，比超市买的100多的还要好，配置也不错，肉的素的都有，刚煮了个蛋黄粽子很不错，米好蛋黄也黄很香，老板态度很好，还想买一份～'
    .decode('utf8'))

#test all dataset
for i in pn.all_review_sentiment_score(pn.sentence_sentiment_score(review)):
    print i

Beispiel #10

0

Datei anzeigen

#! /usr/bin/env python2.7
#coding=utf-8
import textprocessing as tp

review = tp.get_excel_data("../data/review_set.xlsx", 1, 1, "data")
review_txt = open('reivew.txt', 'wb+')
for r in review:
    print r
    review_txt.write(r)
    review_txt.write('\n')

review_txt.close()

Beispiel #11

0

Datei anzeigen

Datei: pos_neg_ml_feature.py Projekt: wac81/Sentiment-Classifier-base-on-Machine-learning

sys.path.append("./Preprocessing module/")
import textprocessing as tp
import cPickle as pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("review_set.xlsx", 1, 7, "data")
sentiment_review = tp.seg_fil_senti_excel("review_set.xlsx", 1, 7)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Beispiel #12

0

Datei anzeigen

Datei: pos neg ml feature.py Projekt: lihui19891118/Sentimental-analysis

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("/home/hadoop/coding/Review set/HTC Z710t_review_2013.6.5.xlsx",1,12, "data")
sentiment_review = tp.seg_fil_senti_excel("/home/hadoop/coding/Review set/Meizu MX_review_2013.6.7.xlsx", 1, 12)



# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("/home/hadoop/coding/Sentiment features/Machine learning features/seniment review set/pos_review.xlsx",1,1)
    negdata = tp.seg_fil_senti_excel("/home/hadoop/coding/Sentiment features/Machine learning features/seniment review set/neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)#把文本变成双词搭配的形式
    bigram_finder = BigramCollocationFinder.from_words(negWords)

Beispiel #13

0

Datei anzeigen

"""

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn

# 1. Load data
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1",
                           "data")
sentiment_review = tp.seg_fil_senti_excel(
    "D:/code/sentiment_test/review_set.xlsx", "1", "1")


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/pos_review.xlsx",
                                     "1", "1")
    negdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/neg_review.xlsx",
                                     "1", "1")

    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

Beispiel #14

0

Datei anzeigen

Datei: test.py Projekt: lanxinxichen/Chinese-Sentiment

__author__ = 'anchengwu'
#coding=utf-8

import sys
sys.path.append("../../../Preprocessing module")

import pos_neg_senti_dict_feature as pn
import textprocessing as tp

# Load dataset
review = tp.get_excel_data("../Machine learning features/seniment review set/pos_review.xlsx", 1, 1, "data")

#test single dataset
print pn.single_review_sentiment_score('买过散装的粽子才来买礼盒的，礼盒很大气，比超市买的100多的还要好，配置也不错，肉的素的都有，刚煮了个蛋黄粽子很不错，米好蛋黄也黄很香，老板态度很好，还想买一份～'.decode('utf8'))

#test all dataset
for i in pn.all_review_sentiment_score(pn.sentence_sentiment_score(review)):
	print i

Beispiel #15

0

Datei anzeigen

Datei: pos neg senti dict feature.py Projekt: zhounan0014/Review-Helpfulness-Prediction

# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt", "lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt", "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt',
                              'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25
    elif word in ishdict:
        sentiment_value *= 0.5
    elif word in insufficientdict:
        sentiment_value *= 0.25

Beispiel #16

0

Datei anzeigen

Datei: posNegSentiDictFeature.py Projekt: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/ish.txt',
    'lines')
insufficientdict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/insufficiently.txt',
    'lines')
inversedict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/inverse.txt',
    'lines')

# Load dataset
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/HTC.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/OPPO.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/MeiZuMX.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx", 1, 12, "data")
review = tp.get_excel_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Motorala.xlsx",
    1, 11, "data")

#获取excel中第一页的第一列的值
#print review[1]


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25

Beispiel #17

0

Datei anzeigen

Datei: pos neg senti dict feature.py Projekt: 0rchard/Review-Helpfulness-Prediction

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
	if word in mostdict:
		sentiment_value *= 2.0
	elif word in verydict:
	    sentiment_value *= 1.5
	elif word in moredict:
	    sentiment_value *= 1.25
	elif word in ishdict:
	    sentiment_value *= 0.5
	elif word in insufficientdict:
	    sentiment_value *= 0.25

Beispiel #18

0

Datei anzeigen

Datei: xlsx2txt.py Projekt: xzm2004260/AByteOfNLP

#! /usr/bin/env python2.7
# coding=utf-8
import textprocessing as tp

review = tp.get_excel_data("../data/review_set.xlsx", 1, 1, "data")
review_txt = open("reivew.txt", "wb+")
for r in review:
    print r
    review_txt.write(r)
    review_txt.write("\n")

review_txt.close()

Beispiel #19

0

Datei anzeigen

    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt',
    'lines')
ishdict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/ish.txt',
    'lines')
insufficientdict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/insufficiently.txt',
    'lines')
inversedict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/inverse.txt',
    'lines')

# Load dataset
#review = tp.get_excel_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/review_set.xlxs", "1", "1", "data")
review = tp.get_excel_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx",
    1, 1, "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25
    elif word in ishdict:
        sentiment_value *= 0.5
    elif word in insufficientdict:

Beispiel #20

0

Datei anzeigen

Datei: pos_neg_ml_feature.py Projekt: vsooda/Review-Helpfulness-Prediction

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx", 1, 1, "data")
sentiment_review = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx", 1, 1)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/neg_review.xlsx", 1, 1)

    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)