Esempio n. 1
0
def test():
    s = Sentiment()
    r = Region(
        "/Users/aaronhe/Documents/NutStore/Aaron He/FDU/Big-Data-Communication/Stereotype-Analysis-in-NetEase-News-Comments/Dict/region_dict/region.txt"
    )

    # 构造输入数据
    text = [
        ["潮汕人很帅,湖北人挺会做生意的!", "上海"],
        ["老铁牛逼!", "重庆"],
        ["我觉得很好吃啊", "北京"],
    ]

    df = pd.DataFrame(text, columns=["text", "src"])
    print(df.head())

    df = r.region_detect(df, on=["text"])

    # dataFrame中批量添加region字段
    print(
        s.sentiment_detect(df,
                           on=["text"],
                           srcs=["src"],
                           dists=["region_1", "region_2", "region_3"]))
    print(s.output_record(src="北京"))
Esempio n. 2
0
def main():
    myData = Data()
    evaluate = Sentiment()
    companies = myData.getCompanies()
    myScraper = Scraper(companies)
    url = str(input("Input a newspaper url: "))
    #url = "https://www.marketwatch.com/"
    #url = "https://www.cnbc.com/investing/"
    #url = "https://www.ccn.com/"
    myScraper.getArticleLinks(url)
    occurances = myScraper.filterByCompany()
    occurances = occurances[:5]
    while True:
        for occurance in occurances:
            name = occurance[0]
            num = occurance[1]
            if num > 0:
                print(name + " has " + str(num) + " articles")
        company = str(input("Which company are you interested in: "))
        headlines = myScraper.findRelatedArticles(company)

        print("We found these articles from the past 7 days: ")
        i = 1
        for headline in headlines:
            print(str(i) + " - " + headline)
            i += 1
        interest = int(input("Which article would you like to analyze? "))
        article = myScraper.relatedArticles[interest - 1]
        cleanArticle = myScraper.parseArticle(article.url)
        print("The stats of the article are: ")
        evaluate.rankSentenceScores(cleanArticle, myData.getBasicDictionary())
Esempio n. 3
0
def main():
    # 数据加载
    date = sys.argv[1]
    path_prefix = "./new_data"
    df = pd.read_pickle(
        os.path.join(path_prefix, "%s_select_comments.p" % date))

    # 模型加载
    s = Sentiment()
    df = s.sentiment_detect(df,
                            on=["content"],
                            srcs=["province"],
                            dists=["region_1", "region_2", "region_3"])
    df_freq = s.table_record()

    # 结果保存
    df.to_pickle(os.path.join(path_prefix, "%s_sentiment.p" % date))
    df_freq.to_pickle(os.path.join(path_prefix, "%s_senti_freq.p" % date))
    print(df)
    print(df_freq)
Esempio n. 4
0
    def __init__(self, path, interval="month"):
        with open(path, 'r') as f:
            self.__soup = BeautifulSoup(f.read(), "html.parser")
            self.messages = []
            self.name = self.__soup.find("title").text.replace(
                "Conversation with ", "")
            message_headers = self.__soup.find_all("div",
                                                   class_="message_header")
            self.__span_meta = [
                m.find("span", class_="meta").text for m in message_headers
            ]
            self.__fbt = FbTime(self.__span_meta)

            for m in self.__soup.find_all("div", class_="message"):
                span = m.find("span", class_="meta")
                self.messages.append(
                    Message(
                        m.find("span", class_="user").text,
                        self.__fbt.span_meta_to_date(span.text, interval),
                        span.text, m.next_sibling.text))

            self.__sent = Sentiment(self.messages, self.__fbt)
            self.participants = self.__scrape_participants()
    def getSentiments(self, path):
        dirs = os.listdir(path)

        for file in dirs:
            filename = path + file
            self.positive.clear()
            self.positive.clear()
            self.negative.clear()
            self.neutral.clear()
            self.compound.clear()
            self.type.clear()
            self.text.clear()
            self.score.clear()
            self.originalScore.clear()
            self.calculatedScore.clear()
            self.formatedText.clear()
            self.correctScore = 0

            print(filename)
            count = 0
            with open(filename) as csvfile:
                print("here")
                spamreader = csv.reader(csvfile, delimiter=',')
                for row in spamreader:
                    count = count + 1
                    if count > 1:
                        self.type.append(row[0])
                        self.text.append(row[1])
                        self.score.append((row[2]))

                for i in range(len(self.text)):

                    findSentimentText = self.text[i]
                    #print(findSentimentText)
                    findSentimentText = encoding.smart_str(findSentimentText,
                                                           encoding='ascii',
                                                           errors='ignore')

                    findSentimentText = findSentimentText.lower()
                    findSentimentText = re.sub(
                        '((www\.[^\s]+)|(https?://[^\s]+))', 'URL',
                        findSentimentText)
                    findSentimentText = re.sub('@[^\s]+', 'AT_USER',
                                               findSentimentText)
                    findSentimentText = re.sub('[\s]+', ' ', findSentimentText)
                    findSentimentText = re.sub(r'#([^\s]+)', r'\1',
                                               findSentimentText)
                    findSentimentText = findSentimentText.strip('\'"')
                    findSentimentText = re.sub('\\\[^\s]+', 'special_symbol',
                                               findSentimentText)
                    findSentimentText = re.sub('\\\[^\s]+', 'special_symbol',
                                               findSentimentText)

                    sentiment = Sentiment()
                    scoreCal = sentiment.getSentimentNLTK(findSentimentText)
                    self.positive.append(scoreCal[2])
                    self.negative.append(scoreCal[1])
                    self.neutral.append(scoreCal[0])
                    self.compound.append(scoreCal[3])

                    if (scoreCal[3] > 0.5):
                        self.calculatedScore.append(1)
                    else:
                        self.calculatedScore.append(-1)

            self.calculateSentiment()
            total = len(self.score)
            print(total)
            print(self.correctScore)
            accuracy = (self.correctScore / float(total))
            print(accuracy)
Esempio n. 6
0
# Add spacy word analyzer
import spacy
from spacy.tokens import Token
nlp = spacy.load('en_core_web_sm')
from Sentiment import Sentiment,News
import glob,os,sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ReadData import ALPACA_REST,runTicker,ConfigTable,ALPHA_TIMESERIES,GetTimeSlot,SQL_CURSOR
from alpaca_trade_api.rest import TimeFrame
import pmdarima as pmd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
s=Sentiment()
debug=False
# create sentiment analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
import statsmodels.api as sm1
from statsmodels.sandbox.regression.predstd import wls_prediction_std

# univariate stacked lstm example
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.utils import plot_model 

api = ALPACA_REST()
inputTxt='Honest Company reports Q1 EPS (13c) vs. 1c last year'
inputTxt='Lennar reports Q2 adjusted EPS $2.95, consensus $2.36'
Esempio n. 7
0
 def get_corpus_size(self):
     total_number_of_docs = 0
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         total_number_of_docs += len(city_sentiment.filenames)
     print(total_number_of_docs)
Esempio n. 8
0
 def get_word_frequency(self):
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         city_sentiment.get_word_frequency('biomass')
Esempio n. 9
0
 def get_city_thresholds(self):
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         city_sentiment.run()
Esempio n. 10
0
from Sentiment import Sentiment

sentiment = Sentiment()

model = sentiment.train_model(load_data=True,nb_epoch=1,old_weight_path="output/weights.13-0.20.hdf5")


tests = ['发货太慢了,商家服务太差.','脑白金强势登陆央视']

labels = sentiment.predict_label(model,tests)

print(labels)