Example #1
0
def main():
    myData = Data()
    evaluate = Sentiment()
    companies = myData.getCompanies()
    myScraper = Scraper(companies)
    url = str(input("Input a newspaper url: "))
    #url = "https://www.marketwatch.com/"
    #url = "https://www.cnbc.com/investing/"
    #url = "https://www.ccn.com/"
    myScraper.getArticleLinks(url)
    occurances = myScraper.filterByCompany()
    occurances = occurances[:5]
    while True:
        for occurance in occurances:
            name = occurance[0]
            num = occurance[1]
            if num > 0:
                print(name + " has " + str(num) + " articles")
        company = str(input("Which company are you interested in: "))
        headlines = myScraper.findRelatedArticles(company)

        print("We found these articles from the past 7 days: ")
        i = 1
        for headline in headlines:
            print(str(i) + " - " + headline)
            i += 1
        interest = int(input("Which article would you like to analyze? "))
        article = myScraper.relatedArticles[interest - 1]
        cleanArticle = myScraper.parseArticle(article.url)
        print("The stats of the article are: ")
        evaluate.rankSentenceScores(cleanArticle, myData.getBasicDictionary())
Example #2
0
def test():
    s = Sentiment()
    r = Region(
        "/Users/aaronhe/Documents/NutStore/Aaron He/FDU/Big-Data-Communication/Stereotype-Analysis-in-NetEase-News-Comments/Dict/region_dict/region.txt"
    )

    # 构造输入数据
    text = [
        ["潮汕人很帅,湖北人挺会做生意的!", "上海"],
        ["老铁牛逼!", "重庆"],
        ["我觉得很好吃啊", "北京"],
    ]

    df = pd.DataFrame(text, columns=["text", "src"])
    print(df.head())

    df = r.region_detect(df, on=["text"])

    # dataFrame中批量添加region字段
    print(
        s.sentiment_detect(df,
                           on=["text"],
                           srcs=["src"],
                           dists=["region_1", "region_2", "region_3"]))
    print(s.output_record(src="北京"))
Example #3
0
 def CompareAngels(angel1, angel2, sentences):
     if angel1 is None or angel2 is None:
         return
     label1 = Sentiment.GetSentimentClass(
         angel1.PredictReviewScore(sentences))
     label2 = Sentiment.GetSentimentClass(
         angel2.PredictReviewScore(sentences))
     if label1 != label2:
         angel1.DumpDetails(sentences)
         angel2.DumpDetails(sentences)
Example #4
0
class Conversation:
    def __init__(self, path, interval="month"):
        with open(path, 'r') as f:
            self.__soup = BeautifulSoup(f.read(), "html.parser")
            self.messages = []
            self.name = self.__soup.find("title").text.replace(
                "Conversation with ", "")
            message_headers = self.__soup.find_all("div",
                                                   class_="message_header")
            self.__span_meta = [
                m.find("span", class_="meta").text for m in message_headers
            ]
            self.__fbt = FbTime(self.__span_meta)

            for m in self.__soup.find_all("div", class_="message"):
                span = m.find("span", class_="meta")
                self.messages.append(
                    Message(
                        m.find("span", class_="user").text,
                        self.__fbt.span_meta_to_date(span.text, interval),
                        span.text, m.next_sibling.text))

            self.__sent = Sentiment(self.messages, self.__fbt)
            self.participants = self.__scrape_participants()

    def interaction_freq(self):
        times = self.__fbt.generate_time_dict()

        for date_str in self.__span_meta:
            time = date_str.split("at ")[1][:5]
            hour = time.split(":")[0]
            times[hour + ":00"] += 1
        return times

    def interaction_timeline(self, name):
        dates = self.__fbt.generate_date_dict()
        for message in self.messages:
            if message.name == name:
                dates[message.date] += 1
        return dates

    def sentiment_timeline(self, name):
        return self.__sent.sentiment_timeline(name)

    def avg_sentiment(self, name):
        return self.__sent.avg_sentiment(name)

    def __scrape_participants(self):
        users = []
        for user_span in self.__soup.find_all("span", "user"):
            user_name = user_span.text
            if user_name not in users:
                users.append(user_name)
        return users
Example #5
0
def text_api_doc(request):
    
    text = str(request.GET['text'])
    usecase = str(request.GET['usecase'])
    modelId = request.GET['modelId']
    modelId = eval(modelId)
    clientId = str(request.GET['clientId'])
    params = str(request.GET['params'])
    
    modelMap_df = pd.read_csv(projectDir+"/config/modelMapping.csv")
    
    if modelId != None:
        usecaseMap = ''.join(modelMap_df.ix[modelMap_df['modelId'] == modelId ,'usecase'].tolist())
        modelName = modelMap_df.ix[modelMap_df['modelId'] == modelId ,'modelName'].tolist()
        
        if text == '' :
            results = {'Predicted Category':None,'Prediction Score':None}
            return HttpResponse(json.dumps(results))
        if usecase.lower() != usecaseMap.lower():
            return HttpResponse("<h1 style='color: red'>Please provide the correct usecase for the modelId:%s<h1>" %modelId)
        if usecase.lower() not in ['sentiment','survey']:
            folderName = modelName
            folderName = baseDir+"/"+''.join(folderName)+"/out"
            try:
                skl = SklCat()    
                transformedXTest = skl.run_text_vectorizeTransform([text], folderName = folderName, vectorizerFile = 'FeatureTransformer.p', pickler = None)    
                y_pred,y_prob_max = skl.run_text_execute_api_doc(transformedXTest, folderName = folderName, modelFile = 'Classifier.p', pickler=None, modelType = 'classification')
                results = {'Predicted Category':list(y_pred),'Prediction Score':list(y_prob_max)}
            except Exception as e:
                return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
        elif usecase.lower() == 'sentiment':
            sentimentPattern= modelName
            sentimentPattern = ''.join(sentimentPattern)
            try:            
                emo = Sentiment()
                results = emo.run(text, modelType = sentimentPattern, args = [], kwargs = {})
            except Exception as e:
                return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
        elif usecase.lower() == 'survey':
            ruleBaseFile = projectDir+"/config/ruleBase_v3.csv"
            try:
                sa = SurveyAnalysis()
                results = sa.run(text,ruleBaseFile = ruleBaseFile)
            except Exception as e:
                return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
        else:
            return HttpResponse("<h1 style='color: red'>Please provide the proper usecase..</h1>")
        
        return HttpResponse(json.dumps(results))
    else:
        return HttpResponse("<h1>Please provide the modelId in URL</h1>")
Example #6
0
class Conversation:
    def __init__(self, path, interval="month"):
        with open(path, 'r') as f:
            self.__soup = BeautifulSoup(f.read(), "html.parser")
            self.messages = []
            self.name = self.__soup.find("title").text.replace(
                "Conversation with ", "")
            message_headers = self.__soup.find_all("div",
                                                   class_="message_header")
            self.__span_meta = [
                m.find("span", class_="meta").text for m in message_headers
            ]
            self.__fbt = FbTime(self.__span_meta)

            for m in self.__soup.find_all("div", class_="message"):
                span = m.find("span", class_="meta")
                self.messages.append(
                    Message(
                        m.find("span", class_="user").text,
                        self.__fbt.span_meta_to_date(span.text, interval),
                        span.text, m.next_sibling.text))

            self.__sent = Sentiment(self.messages, self.__fbt)
            self.participants = self.__scrape_participants()

    def interaction_freq(self):
        return self.__fbt.interaction_freq()

    def interaction_timeline(self, name):
        return self.__fbt.interaction_timeline(name, self.messages)

    def sentiment_timeline(self, name, interval):
        return self.__sent.sentiment_timeline(name, interval)

    def avg_sentiment(self, name):
        return self.__sent.avg_sentiment(name)

    def get24HourTime(self, elem):
        return self.__fbt.get24HourTime(elem)

    # Returns a list of participants in the conversation.
    def __scrape_participants(self):
        users = []
        for user_span in self.__soup.find_all("span", "user"):
            user_name = user_span.text
            if user_name not in users:
                users.append(user_name)
        return users
Example #7
0
def main():
    # 数据加载
    date = sys.argv[1]
    path_prefix = "./new_data"
    df = pd.read_pickle(
        os.path.join(path_prefix, "%s_select_comments.p" % date))

    # 模型加载
    s = Sentiment()
    df = s.sentiment_detect(df,
                            on=["content"],
                            srcs=["province"],
                            dists=["region_1", "region_2", "region_3"])
    df_freq = s.table_record()

    # 结果保存
    df.to_pickle(os.path.join(path_prefix, "%s_sentiment.p" % date))
    df_freq.to_pickle(os.path.join(path_prefix, "%s_senti_freq.p" % date))
    print(df)
    print(df_freq)
Example #8
0
    def cut(self, infile, word_outile=None, nominal_outfile=None):
        '''
        infile:输入文件
        word_outfile:分词的输出文件
        nominal_outfile:词性的输出结果
        '''
        try:
            contents = self.cut_to_sentence(infile)
            writer_word = None
            writer_nominal = None
            if word_outile:
                writer_word = open(word_outile, 'w')
            if nominal_outfile:
                writer_nominal = open(nominal_outfile, 'w')
            sentiment = [0, 0, 0]
            if writer_word:
                for content in contents:
                    words = list(
                        set(jieba.cut(content, cut_all=True)) - self.stop_word)
                    words = filter(lambda x: len(x) > 0, words)
                    if len(words) > 0:
                        tmp = reduce(
                            lambda x, y:
                            (x[0] + y[0], x[1] + y[1], x[2] + y[2]),
                            map(lambda x: Sentiment.get_sentiment(x), words))
                        sentiment[0] += tmp[0]
                        sentiment[1] += tmp[1]
                        sentiment[2] += tmp[2]
                        writer_word.write(' '.join(
                            map(lambda w: w.encode('utf8', 'ignore'), words)) +
                                          '\n')
                writer_word.close()

            stop_flag = set(['p', 'x', 'm', 'd', 'c', 'a'])

            if writer_nominal:
                for content in contents:
                    words = pseg.cut(content)
                    for w in words:
                        if (w.word
                                not in self.stop_word) and (w.flag
                                                            not in stop_flag):
                            writer_nominal.write(
                                w.word.encode('utf8', 'ignore') + ' ' +
                                w.flag.encode('utf8', 'ignore') + ' ' +
                                unicode(hash(content)).encode(
                                    'utf8', 'ignore') + '\n')

                writer_nominal.close()
            return sentiment
        except Exception, e:
            traceback.print_exc()
            print 'cut error: %s !' % e
Example #9
0
    def cut(self, infile, word_outile=None, nominal_outfile=None):
        """
        infile:输入文件
        word_outfile:分词的输出文件
        nominal_outfile:词性的输出结果
        """
        try:
            contents = self.cut_to_sentence(infile)
            writer_word = None
            writer_nominal = None
            if word_outile:
                writer_word = open(word_outile, "w")
            if nominal_outfile:
                writer_nominal = open(nominal_outfile, "w")
            sentiment = [0, 0, 0]
            if writer_word:
                for content in contents:
                    words = list(set(jieba.cut(content, cut_all=True)) - self.stop_word)
                    words = filter(lambda x: len(x) > 0, words)
                    if len(words) > 0:
                        tmp = reduce(
                            lambda x, y: (x[0] + y[0], x[1] + y[1], x[2] + y[2]),
                            map(lambda x: Sentiment.get_sentiment(x), words),
                        )
                        sentiment[0] += tmp[0]
                        sentiment[1] += tmp[1]
                        sentiment[2] += tmp[2]
                        writer_word.write(" ".join(map(lambda w: w.encode("utf8", "ignore"), words)) + "\n")
                writer_word.close()

            stop_flag = set(["p", "x", "m", "d", "c", "a"])

            if writer_nominal:
                for content in contents:
                    words = pseg.cut(content)
                    for w in words:
                        if (w.word not in self.stop_word) and (w.flag not in stop_flag):
                            writer_nominal.write(
                                w.word.encode("utf8", "ignore")
                                + " "
                                + w.flag.encode("utf8", "ignore")
                                + " "
                                + unicode(hash(content)).encode("utf8", "ignore")
                                + "\n"
                            )

                writer_nominal.close()
            return sentiment
        except Exception, e:
            traceback.print_exc()
            print "cut error: %s !" % e
Example #10
0
    def __init__(self, path, interval="month"):
        with open(path, 'r') as f:
            self.__soup = BeautifulSoup(f.read(), "html.parser")
            self.messages = []
            self.name = self.__soup.find("title").text.replace(
                "Conversation with ", "")
            message_headers = self.__soup.find_all("div",
                                                   class_="message_header")
            self.__span_meta = [
                m.find("span", class_="meta").text for m in message_headers
            ]
            self.__fbt = FbTime(self.__span_meta)

            for m in self.__soup.find_all("div", class_="message"):
                span = m.find("span", class_="meta")
                self.messages.append(
                    Message(
                        m.find("span", class_="user").text,
                        self.__fbt.span_meta_to_date(span.text, interval),
                        span.text, m.next_sibling.text))

            self.__sent = Sentiment(self.messages, self.__fbt)
            self.participants = self.__scrape_participants()
Example #11
0
def main():
  ticker = raw_input("\n\n\n----------------------------------------------\nWelcome. Ready to trade? Pick a stock ticker: ")
  reuterObj = ReutersQuery()
  reuterVector = reuterObj.getQuery(ticker)

  sentimentObj = Sentiment()
  sentiments = sentimentObj.sentimentVectorize(reuterVector)

  yahooObj = YahooQuery()
  yahooVector = yahooObj.doYahooQuery(ticker, reuterVector)

  reuterDates = DateFormat()
  dates = reuterDates.fixDates(reuterVector)

  mergeObj = Merge()
  merged = mergeObj.mergeEverything(sentiments, yahooVector, dates)

  strategyObj = Strategy()
  metrics = strategyObj.runStrategy(ticker, merged)

  outputObj = Output()
  outputObj.putOutput(ticker, metrics, yahooVector, merged)
  print '\nThanks for trading with Vivek! Get money, get paid!'
 def predict(self, filePath):
     #PREDICT
     lexicon = util.LoadLexiconFromCSV(
         "../files/lexicons/SentiWordNet_Lexicon_concise.csv")
     angel = Angel(lexicon, True)
     parsedReviewsPath = os.path.join(os.path.dirname(filePath),
                                      "YelpParsedReviews.json")
     with open(parsedReviewsPath, 'r') as file:
         TrainingFile = file.read()
     classificationData = json.loads(TrainingFile)
     for k in range(len(classificationData["ClassificationModel"])):
         current = classificationData["ClassificationModel"][str(k + 1)]
         notCount = current["NotCount"]
         if "Sentences" in current:
             if not isinstance(current["Sentences"], list):
                 current["Sentences"] = [current["Sentences"]]
             sentences = current["Sentences"]
         else:
             continue
         current["Label"] = Sentiment.GetSentimentClass(
             angel.PredictReviewScore(sentences, notCount), 1)
         angel.DumpDetails(sentences, current["Label"])
     return classificationData
Example #13
0
 def __init__(self):
     Sentiment.__init__(self, testMode=False)
Example #14
0
            "Alcoa upgraded to Buy from Hold at Deutsche Bank",
            'Peter Thiel reports 6.6% passive stake in Palantir',
            'Senvest Management reports 5.54% passive stake in GameStop',
            'Citron shorting Palantir, sees $20 stock by end of 2020',
            'Amazon.com assumed with an Outperform at Wolfe Research',
            'Alcoa initiated with a Sell at Goldman Sachs',
            'Palantir initiated with a Market Perform at William Blair',
            'Alcoa options imply 10.0% move in share price post-earnings',
            'GameStop believes it has sufficient liquidity to fund operations',
            'GameStop says \'Reboot\' is delivering lower costs, reduced debt',
            'Palantir, Rio Tinto sign multi-year enterprise partnership',
            'PG&E begins deployment of Palantir\'s Foundry Software',
            'Citi says sell Palantir on deceleration in growth, upcoming lockup expiry',
            'Pentagon cybersecurity project slowed by flaws, Bloomberg says',
            'Palantir awarded contract from Army worth up to $250M',
            'Palantir awarded $44.4M contract from FDA',
            'Fujitsu signs $8M contract as Palantir Foundry customer',
            'Army Vantage reaffirms Palantir partnership with $114M agreement',
            'Palantir provides update on partnership with Greece to support COVID-19 response',
            'Palantir receives Army prototype contract to support network modernization',
            'Soros takes stake in Palantir, exits TransDigm position',
            "AOC flags \'material risks\' to Palantir investors in SEC letter, TechCrunch says",
            "PetIQ \'a smart way\' to play pet care boom, Barron's says",
        ]
        for i in infos:
            print('')
            print(i)
            s = Sentiment()
            s.Parse(i, sid=sid, nlp=nlp)
            print(s)
Example #15
0
def text_api(request):
                
        text = str(request.GET['text'])
        usecase = str(request.GET['usecase'])
        modelId = request.GET['modelId']
        modelId = eval(modelId)
        clientId = str(request.GET['clientId'])
        params = str(request.GET['params'])
        
        modelMap_df = pd.read_csv(projectDir+"/config/modelMapping.csv")
        results = []
        records = eval(text)
        
        
        if modelId != None:
            usecaseMap = ''.join(modelMap_df.ix[modelMap_df['modelId'] == modelId ,'usecase'].tolist())
            modelName = modelMap_df.ix[modelMap_df['modelId'] == modelId ,'modelName'].tolist()
            
            
            if text == '' :
                results = {'Predicted Category':None,'Prediction Score':None}
                return HttpResponse(json.dumps(results))
            if usecase.lower() != usecaseMap.lower():
                return HttpResponse("<h1 style='color: red'>Please provide the correct usecase for the modelId:%s<h1>" %modelId)
            if usecase.lower() not in ['sentiment','survey','ltv']:
                df = pd.read_json(json.dumps(records))  # converting json string into dataframe        
                folderName = modelName
                folderName = baseDir+"/"+''.join(folderName)+"/out"
                
                try:
                        
                        skl = SklCat()
                        #df = df.ix[df['lineBy']==2,:]   #filtering customer lines         
                        df['chat_intent'] = df['lineText'].apply(lambda x:skl.run_text_vectorizeTransform([str(x)], folderName = folderName, vectorizerFile = 'FeatureTransformer.p', pickler = None)).apply(lambda x: skl.run_text_execute_api_bulkDoc(x, folderName = folderName, modelFile = 'Classifier.p', pickler=None, modelType = 'classification'))
                        dfResults = df.ix[:,['sessionId','chat_intent']]
                        results = dfResults.to_json(orient='records')
                        '''
                        for doc in records:
                                
                                text = doc['chat_text']
                            
                                transformedXTest = skl.run_text_vectorizeTransform([text], folderName = folderName, vectorizerFile = 'FeatureTransformer.p', pickler = None)    
                                y_pred,y_prob_max = skl.run_text_execute(transformedXTest, folderName = folderName, modelFile = 'Classifier.p', pickler=None, modelType = 'classification')
                                output = {'session_id':doc['session_id'],'chat_text':text ,'Predicted Category':list(y_pred),'Prediction Score':list(y_prob_max)}
                                results.append(output)
                        '''
                        return HttpResponse(json.dumps(results))
                except Exception as e:
                        
                        return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
            elif usecase.lower() == 'sentiment':
                               
                
                sentimentPattern= modelName
                sentimentPattern = ''.join(sentimentPattern)
                records = eval(text)
                
                
                try:
                        
                        
                        emo = Sentiment()
                        
                        results = []
                        jsonResults = {}
                        for doc in records:
                                jsonTemp = {}
                                jsonTemp['sessionId'] = doc['sessionId']
                                df_chat_data = pd.read_json(json.dumps(doc['chat_data']))
                                
                                df_chat_data['sentiment'] = df_chat_data['lineText'].apply(lambda row: emo.run(row,modelType = sentimentPattern,args = [],kwargs={}))
                                df = df_chat_data.ix[:,['lineNum','sentiment']]
                                
                                jsonObj = df.to_json(orient ='records')
                                each_line = json.loads(json.dumps(jsonObj))
                                jsonTemp['each_line'] = each_line
                                chat_sentiment = {}
                                chat_sentiment['overall'] = overallSentiment(df_chat_data['sentiment'])
                                lastlines = 2
                                chat_sentiment['last_line_sentiment'] = df_chat_data['lineText'].tail(lastlines).apply(lambda row: emo.run(row,modelType = sentimentPattern,args = [],kwargs={})).to_json(orient='records')
                                chat_sentiment['switches_to_postive'] = getPositivieSwitches(df_chat_data['sentiment'].tolist())
                                jsonTemp['chat_sentiment'] = chat_sentiment
                                
                                results.append(jsonTemp)
                                
                        
                                
                                
                        '''
                        
                        for doc in records:
                            text  = doc['lineText']
                            output = emo.run(text, modelType = sentimentPattern, args = [], kwargs = {})
                            output['sessionId']= doc['sessionId']
                            output['lineText'] = text
                            results.append(output)
                        '''
                        return HttpResponse(json.dumps(results ,indent=5))
                except Exception as e:
                    return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
            elif usecase.lower() == 'survey':
                
                ruleBaseFile = projectDir+"/config/ruleBase_v3.csv"
                try:
                        
                        sa = SurveyAnalysis()
                        results = sa.run(text,ruleBaseFile = ruleBaseFile)
                except Exception as e:
                        
                        return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
            elif usecase.lower() == 'ltv':
                
                folderName = modelName
                folderName = baseDir+"/"+''.join(folderName)+"/out"
                skl = SklCat()
                results = []
                jsonResults = {}
                
                
                try:
                        
                        for doc in records:
                                
                                jsonTemp = {}
                                jsonTemp['sessionId'] = doc['sessionId']
                                df_chat_data = pd.read_json(json.dumps(doc['chat_data']))
                                if 'lineNum' not in df_chat_data.columns.tolist():
                                        df_chat_data['lineNum'] = pd.Series(range(1, df_chat_data.shape[0]+1))
                                df_chat_data = df_chat_data.ix[df_chat_data['lineBy'] == 1,:]           #filtering only agent lines
                                #ltv = df_chat_data['lineText'].apply(lambda x:skl.run_text_vectorizeTransform([str(x)], folderName = folderName, vectorizerFile = 'FeatureTransformer.p', pickler = None)).apply(lambda x: skl.run_text_execute_api_bulkDoc(x, folderName = folderName, modelFile = 'Classifier.p', pickler=None, modelType = 'classification',usecase='ltv'))
                                transformedXTest = skl.run_text_vectorizeTransform(df_chat_data['lineText'].tolist(), folderName = folderName, vectorizerFile = 'FeatureTransformer.p', pickler = None)
                                dfProb = skl.run_text_execute_api_bulkDoc(transformedXTest, folderName = folderName, modelFile = 'Classifier.p', pickler=None, modelType = 'classification',usecase='ltv')
                                
                                #dfTest = df_chat_data.ix[df_chat_data['ltv'].apply(lambda x : len(x)!=0)]
                                
                                
                                
                                if len(dfProb) != 0:
                                                                                
                                        dfProb['lineNum'] = pd.Series(range(1, dfProb.shape[0]+1))
                                        threshold = 0.2
                                        dfMerged = pd.merge(df_chat_data, dfProb, how='left', left_on='lineNum', right_on='lineNum',suffixes = ('_x','_y'))
                                        dfMerged = dfMerged.ix[:,['lineNum','y_pred','y_prob_max']]
                                        dfMerged['flag'] = dfMerged.apply(lambda row: row['y_pred'] == 1  and row['y_prob_max'] > threshold,axis = 1) 
                                        ltv_agg = 1 if dfMerged['flag'].any() else 0
                                        jsonTemp['chat_ltv'] = {'category':ltv_agg}
                                        jsonObj = dfMerged.to_json(orient = 'records')
                                        
                                        each_line = json.loads(json.dumps(jsonObj))
                                
                                jsonTemp['each_line'] = each_line
                                
                                results.append(jsonTemp)
                        
                        
                        return HttpResponse(json.dumps(results))
                except Exception as e:
                        
                        return HttpResponse("<h1 style='color: red'>Error Occured:%s</h1>" %e)
            
            
            else:
                return HttpResponse("<h1 style='color: red'>Please provide the proper usecase..</h1>")
            
            #return HttpResponse(json.dumps(results))
        else:
            return HttpResponse("<h1>Please provide the modelId in URL</h1>")
Example #16
0
 def get_city_thresholds(self):
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         city_sentiment.run()
Example #17
0
 def get_word_frequency(self):
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         city_sentiment.get_word_frequency('biomass')
Example #18
0
 def get_corpus_size(self):
     total_number_of_docs = 0
     for city in self.senitiment_cities:
         city_sentiment = Sentiment(city, self.to_tokenize, self.document_type)
         total_number_of_docs += len(city_sentiment.filenames)
     print(total_number_of_docs)
from twython import Twython
from twython import TwythonStreamer
from Sentiment import Sentiment
import time

# PLEASE REPLACE THESE WITH YOUR DEV ACCOUNT
APP_KEY = '5uPcZuLaqQed6cZ87HoJpQ'
APP_SECRET = 'khSFegHa2nIzsymI80x1UqlAUzE0tr9zbyYDq8U1eQ'
OAUTH_TOKEN = '322195502-UxtFta4eNP6g12ZKrnqIQvnn1tPf5wim0HvYGu1w'
OAUTH_TOKEN_SECRET = '19fQ7S3LDv6lkIDiGGMlBUeVvOP7Ct08QxhmpnELIorwU'


AFINN_FILE = "AFINN-111.txt"
happyCalc = Sentiment(AFINN_FILE)
sentimentScore = 0


class MyTwythonStreamer(TwythonStreamer):
  myTweets = []

  def on_success(self, data):
    if 'text' in data:
      tweet = data['text'].encode('utf-8')
      self.add_tweets(tweet)
      print tweet
      self.disconnect()


  def on_error(self, status_code, data):
    print status_code
    print data
Example #20
0
from Sentiment import Sentiment

sentiment = Sentiment()

model = sentiment.train_model(load_data=True,nb_epoch=1,old_weight_path="output/weights.13-0.20.hdf5")


tests = ['发货太慢了,商家服务太差.','脑白金强势登陆央视']

labels = sentiment.predict_label(model,tests)

print(labels)
Example #21
0
# Add spacy word analyzer
import spacy
from spacy.tokens import Token
nlp = spacy.load('en_core_web_sm')
from Sentiment import Sentiment,News
import glob,os,sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ReadData import ALPACA_REST,runTicker,ConfigTable,ALPHA_TIMESERIES,GetTimeSlot,SQL_CURSOR
from alpaca_trade_api.rest import TimeFrame
import pmdarima as pmd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
s=Sentiment()
debug=False
# create sentiment analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
import statsmodels.api as sm1
from statsmodels.sandbox.regression.predstd import wls_prediction_std

# univariate stacked lstm example
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.utils import plot_model 

api = ALPACA_REST()
inputTxt='Honest Company reports Q1 EPS (13c) vs. 1c last year'
inputTxt='Lennar reports Q2 adjusted EPS $2.95, consensus $2.36'
Example #22
0
def ImpactTraining(docPath, lexPath, lexiconID):
    """
    Final score of the review is calculated as follows:
    (Score1*Multiplier1 + Score2*Multiplier2 ... ScoreN*MultiplierN) * BaseMultiplier = ReviewScore
    Ignoring BaseMultiplier for this training, assuming it has minimal impact (TODO: test this impact)
    ScoreK*MultiplierK/ReviewScore * 100 = PercentImpact (impact %age of word K on the final score)
    TotalAdjustment = Expected Score - FinalScore
    AdjustmentK = PercentImpact of TotalAdjustment  (total adjustment needed for word K)
    Adjustment on word K for this review = AdjustmentK/MultiplierK
    Take the mean of all adjustments, and applying to the final lexicon, to get the new lexicon
    Repeat process until there is performance improvement.
    """
    oldAccuracy = 0
    oldAngel = None
    se = PerformanceTest(lexPath, docPath)
    while True:
        adjustments = defaultdict(list)
        newAngel = Angel(se.lexicon, smallReviews=True)
        expectedSentiment, predictedOverall = [], []
        se.ResetIterator()
        while True:
            try:
                sentences, expectedLabel, notCount, docId = se.NextElement()
                expectedSentiment.append(expectedLabel)
                predictedScore = newAngel.PredictReviewScore(
                    sentences, expectedLabel)
                predictedLabel = Sentiment.GetSentimentClass(predictedScore)
                predictedOverall.append(predictedLabel)
                if oldAngel is not None:
                    oldPredictedLabel = Sentiment.GetSentimentClass(
                        oldAngel.PredictReviewScore(sentences, expectedLabel))
                    if oldPredictedLabel != predictedLabel:
                        oldAngel.DumpDetails(sentences, expectedLabel)
                        newAngel.DumpDetails(sentences, expectedLabel)
                totalImpact, impactTable = newAngel.GetImpact(sentences)
                if totalImpact == 0:
                    continue
                totalAdjustment = expectedLabel * 10 - predictedScore
                for word, (wordScore, multiplier) in impactTable.iteritems():
                    if multiplier == 0:
                        continue
                    wordAdjustment = ((wordScore / totalImpact) *
                                      totalAdjustment) / multiplier
                    if wordAdjustment != 0:
                        adjustments[word].append(wordAdjustment)
            except StopIteration:
                break
        newAccuracy = util.accuracy(predictedOverall, expectedSentiment)
        print "Accuracy:", oldAccuracy, "--->", newAccuracy
        if newAccuracy <= oldAccuracy:
            break
        for word in adjustments:
            se.lexicon[word] = str(
                float(se.lexicon[word]) + numpy.mean(adjustments[word]))
        oldAngel = newAngel
        oldAccuracy = newAccuracy

    filename = "../files/lexicons/" + lexiconID + ".csv"
    handle = open(filename, 'wb')
    wr = csv.writer(handle)
    for key, value in sorted(oldAngel.lexicon.items()):
        row = [key, value]
        wr.writerow(row)
    handle.close()
    def PerformTest(self):
        """
        This method loads the test data file, and tests how good the prediction is.
        It also prints the precision, recall and F1 scores.
        """
        angel = Angel(self.lexicon, True)
        angel.SetDumpParameters(7, -7)
        posx, negx, neutx, accx, = 0, 0, 0, 0
        maxnegf1 = maxneutf1 = maxposf1 = maxacc = 0
        for threshold in range(1, 0, -1):
            predictedOverall = []
            expectedSentiment = []
            demons = TotPos = TotNeg = TotNeut = 0
            while True:
                try:
                    sentences, label, notCount, docId = self.NextElement()
                    if not sentences:
                        continue
                    if label == 'NULL':
                        break
                    label = int(label)
                    expectedSentiment.append(label)
                    predicted = angel.PredictReviewScore(sentences, label)
                    predictedOverall.append(Sentiment.GetSentimentClass(predicted, threshold))
                    if label == Sentiment.POSITIVE:
                        TotPos += 1
                    elif label == Sentiment.NEGATIVE:
                        TotNeg += 1
                    else:
                        TotNeut += 1
                    if angel.DumpRequested(predicted, label):
                        print "ID", docId, "\n"
                        demons += 1
                except StopIteration:
                    break

            print "Demons:", demons
            pos_prec = util.precision_with_class(predictedOverall, expectedSentiment, 1)
            neg_prec = util.precision_with_class(predictedOverall, expectedSentiment, -1)
            neut_prec = util.precision_with_class(predictedOverall, expectedSentiment, 0)
            pos_rec = util.recall_with_class(predictedOverall, expectedSentiment, 1)
            neg_rec = util.recall_with_class(predictedOverall, expectedSentiment, -1)
            neut_rec = util.recall_with_class(predictedOverall, expectedSentiment, 0)
            pos_f1 = util.f1_with_class(predictedOverall, expectedSentiment, 1)
            neg_f1 = util.f1_with_class(predictedOverall, expectedSentiment, -1)
            neut_f1 = util.f1_with_class(predictedOverall, expectedSentiment, 0)
            accuracy = util.accuracy(predictedOverall, expectedSentiment)
            print "Current Positive stats (", threshold, "): ","\t", '{:.2%}'.format(pos_prec), \
                "\t", '{:.2%}'.format(pos_rec), "\t", '{:.2%}'.format(pos_f1)
            print "Current Negative stats (", threshold, "): ", "\t", '{:.2%}'.format(neg_prec), "\t", \
                '{:.2%}'.format(neg_rec), "\t", '{:.2%}'.format(neg_f1)
            print "Current Neutral stats (", threshold, "): ", "\t", '{:.2%}'.format(neut_prec), "\t", \
                '{:.2%}'.format(neut_rec), "\t", '{:.2%}'.format(neut_f1)
            cprint("Current Accuracy ( " + str(threshold) + " ):\t\t\t" + '{:.2%}'.format(accuracy), 'red')
            if pos_f1 > maxposf1:
                maxposf1 = pos_f1
                posx = threshold
            if neg_f1 > maxnegf1:
                maxnegf1 = neg_f1
                negx = threshold
            if neut_f1 > maxneutf1:
                maxneutf1 = neut_f1
                neutx = threshold
            if accuracy > maxacc:
                maxacc = accuracy
                accx = threshold
        print "Maximum Positive F1: ", '{:.2%}'.format(maxposf1), "at", posx
        print "Maximum Negative F1: ", '{:.2%}'.format(maxnegf1), "at", negx
        print "Maximum Neutral F1: ", '{:.2%}'.format(maxneutf1), "at", neutx
        cprint("Maximum Accuracy: " + '{:.2%}'.format(maxacc) + " at " + str(accx), 'red')
Example #24
0
# Add spacy word analyzer
import spacy
from spacy.tokens import Token
nlp = spacy.load('en_core_web_sm')
from Sentiment import Sentiment, News
import glob, os, sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ReadData import ALPACA_REST, runTicker, ConfigTable, ALPHA_TIMESERIES, GetTimeSlot, SQL_CURSOR
from alpaca_trade_api.rest import TimeFrame
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pmd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
s = Sentiment()
debug = False
# create sentiment analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
import statsmodels.api as sm1
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import AnaSignal

api = ALPACA_REST()
inputTxt = 'Honest Company reports Q1 EPS (13c) vs. 1c last year'
#inputTxt='Lennar reports Q2 adjusted EPS $2.95, consensus $2.36'
#inputTxt='Cognyte reports Q1 EPS (20c), consensus (15c)'
#inputTxt='Brookdale Senior Living resumed with a Buy at Stifel'
#inputTxt='Anglo American price target raised to 3,670 GBp from 3,500 GBp at Morgan Stanley'
#inputTxt='GMS Inc. reports Q4 adjusted EPS $1.07, consensus 82c'
##inputTxt='CalAmp reports Q1 adjusted EPS 8c, consensus 7c'
    def getSentiments(self, path):
        dirs = os.listdir(path)

        for file in dirs:
            filename = path + file
            self.positive.clear()
            self.positive.clear()
            self.negative.clear()
            self.neutral.clear()
            self.compound.clear()
            self.type.clear()
            self.text.clear()
            self.score.clear()
            self.originalScore.clear()
            self.calculatedScore.clear()
            self.formatedText.clear()
            self.correctScore = 0

            print(filename)
            count = 0
            with open(filename) as csvfile:
                print("here")
                spamreader = csv.reader(csvfile, delimiter=',')
                for row in spamreader:
                    count = count + 1
                    if count > 1:
                        self.type.append(row[0])
                        self.text.append(row[1])
                        self.score.append((row[2]))

                for i in range(len(self.text)):

                    findSentimentText = self.text[i]
                    #print(findSentimentText)
                    findSentimentText = encoding.smart_str(findSentimentText,
                                                           encoding='ascii',
                                                           errors='ignore')

                    findSentimentText = findSentimentText.lower()
                    findSentimentText = re.sub(
                        '((www\.[^\s]+)|(https?://[^\s]+))', 'URL',
                        findSentimentText)
                    findSentimentText = re.sub('@[^\s]+', 'AT_USER',
                                               findSentimentText)
                    findSentimentText = re.sub('[\s]+', ' ', findSentimentText)
                    findSentimentText = re.sub(r'#([^\s]+)', r'\1',
                                               findSentimentText)
                    findSentimentText = findSentimentText.strip('\'"')
                    findSentimentText = re.sub('\\\[^\s]+', 'special_symbol',
                                               findSentimentText)
                    findSentimentText = re.sub('\\\[^\s]+', 'special_symbol',
                                               findSentimentText)

                    sentiment = Sentiment()
                    scoreCal = sentiment.getSentimentNLTK(findSentimentText)
                    self.positive.append(scoreCal[2])
                    self.negative.append(scoreCal[1])
                    self.neutral.append(scoreCal[0])
                    self.compound.append(scoreCal[3])

                    if (scoreCal[3] > 0.5):
                        self.calculatedScore.append(1)
                    else:
                        self.calculatedScore.append(-1)

            self.calculateSentiment()
            total = len(self.score)
            print(total)
            print(self.correctScore)
            accuracy = (self.correctScore / float(total))
            print(accuracy)
	def __init__(self):
		Sentiment.__init__(self, testMode = False)