def push_data_to_mongoDB(articles, search_query_id): db_connection = mongo.getDBCon() db = db_connection.production news_articles_collection = db.newsAPIArticles for article in articles: #check if news article exists if not check_if_news_exists(article["url"], db): #push the article try: #print(article) article["id"] = uuid.uuid4().hex result = news_articles_collection.insert(article) print(result) except: print("Error in Inserting to Mongo") #push the reference #create a pinAlpha News ID newsID = uuid.uuid4().hex input_dict = { "pinalpha_news_id": newsID, "search_theme_id": search_query_id, "url": article["url"], "date": article["publishedAt"][0:10] } print(input_dict) theme_article_collection = db["themeArticleMap"] theme_article_collection.insert(input_dict)
def get_sentiments(article_ids): df = pd.DataFrame({'date': [], 'sentiment': []}) mongoCon = mc.getDBCon() # connection db = mongoCon.production # database article_collection = db.newsAPIArticles themeArticleMap_Collection = db.themeArticleMap article_sentiment_collection = db.newsArticleSentimentValues for item in article_ids: cursor = article_collection.find({"id": item}) for document in cursor: theme_cursor = themeArticleMap_Collection.find( {"url": document['url']}) for doc in theme_cursor: sentiment_cursor = article_sentiment_collection.find( {"pinalpha_news_id": doc['pinalpha_news_id']}) for sents in sentiment_cursor: df = df.append( { "date": sents['date'], "news_id": item, "sentiment": sents['google_score'] }, ignore_index=True) mongoCon.close() return df
def get_content_from_articles(phrase, theme, YMdate): query = { "$and": [{ "content": { "$regex": phrase } }, { "content": { "$regex": theme } }, { "publishedAt": { "$regex": YMdate } }] } mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse newsAPIArticles_collection = db.newsAPIArticles # collection news_article = newsAPIArticles_collection.find(query) for item in news_article: extracted = summary.ExtractSummary(item['content']) if not phrase == "trade war": phrase = "SG Banks" query = { "date": YMdate, "type": phrase, "news_id": item['id'], "sentence": extracted } print(query) insert_mongo(query) break mongoCon.close()
def getnews_daily(company): query = {"company_name": company} mongoCon = mc.getDBCon() # connection db = mongoCon.production # database companyTenMostRecent_collection = db.companyTenMostRecent result = companyTenMostRecent_collection.find(query) json_result = dumps(result) return json_result
def get_sentnces_daily(theme,date): query = {"theme":theme,"date":date} mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeSentence_collection = db.dailyThemeSentence result = dailyThemeSentence_collection.find(query) json_result = dumps(result) return json_result
def get_sentences_with_theme(company, theme): mongoCon = mc.getDBCon() #connection db = mongoCon.production #databse sentence_collection = db.sentence_article_map #collection query = {"$and": [{"company": company}, {"sentence": {"$regex": theme}}]} mydoc = sentence_collection.find(query) for x in mydoc: print(x) return None
def put_themes_data_to_db(): mongo_con = mc.getDBCon() #get mongoDB client prod_db = mongo_con['production'] themes_collection = prod_db['themes'] #read themes from file themes_filename = "./Data/CompanyThemes.csv" df_themes = get_themes_from_file() return df_themes
def delete_tradwar(): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact query = {"theme": "DBS"} dailyThemeImpact_collection.delete_many(query) query = {"theme": "dbs"} dailyThemeImpact_collection.delete_many(query) mongoCon.close()
def read_sentiment_from_mongo(theme): df_sentiments = pd.DataFrame() mongoCon = mc.getDBCon() # connection db = mongoCon.production # database themeSentimentArticlesMap_collection = db.themeSentimentArticlesMap query = {"theme":theme} response = themeSentimentArticlesMap_collection.find(query) for item in response: df_sentiments = df_sentiments.append({"date":item['date'],"sentiment":item['sentiment']},ignore_index=True) return df_sentiments
def get_article_ids(theme,date): query = {"$and":[{"theme":theme},{"date":date}]} mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse themeSentArticleMap_collection = db.themeSentimentArticlesMap article_ids = [] articles = themeSentArticleMap_collection.find(query) for item in articles: article_ids.append(item["news_id"]) article_ids = list(set(article_ids)) mongoCon.close() return article_ids
def update_mongo_sentiment(sentiment_list): mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse articleSentiment_collection = db.articleSentiment # collection #articleSentiment_collection.insert(sentiment_list) try: result = articleSentiment_collection.insert(sentiment_list) print(result) except: print("Insert Error - sentiment") mongoCon.close() return
def get_impact_of_theme_date(theme, date): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpactIntermediate_collection = db.dailyThemeImpactIntermediate query = {"theme": theme, "date": date} response = dailyThemeImpactIntermediate_collection.find(query) sentimentVal = "Negative" for item in response: if item['impact'] < 0: sentimentVal = "Negative" else: sentimentVal = "Positive" return sentimentVal
def get_content(articles): contents = [] mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse newsAPIArticles_collection = db.newsAPIArticles # collection for article in articles: searchQuery = {"url": article['url']} article_content = newsAPIArticles_collection.find(searchQuery) for item in article_content: content = item['content'] contents.append(content) mongoCon.close() return contents
def get_data(searchTheme, startDate): searchQuery = { "$and": [{ "date": startDate }, { "search_theme_id": searchTheme }] } mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse newsAPIArticles_collection = db.themeArticleMap # collection news_article = newsAPIArticles_collection.find(searchQuery) mongoCon.close() return news_article # this is a mongodb cursor
def get_sentence_summary(list_news): summary_list = {} mongoCon = mc.getDBCon() # connection db = mongoCon.production # database newsArticle_collection = db.newsAPIArticles for id in list_news: query = {"id": id} all_articles = newsArticle_collection.find(query) for artilce in all_articles: content = artilce['content'] article_summary = summary.ExtractSummary(content) summary_list[id] = article_summary mongoCon.close() return summary_list
def get_company_data(company): query = {"theme": company} mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact all_company_impact = dailyThemeImpact_collection.find(query) df = pd.DataFrame() for item in all_company_impact: df = df.append({ "date": item['date'], "impact": item['impact'] }, ignore_index=True) return df
def get_tradewar_data(): df_tradewar = pd.DataFrame() mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpactIntermediate findQuery = {"theme": "trade_war"} respone = dailyThemeImpact_collection.find(findQuery) for item in respone: df_tradewar = df_tradewar.append( { "date": item['date'], "impact": item['impact'] }, ignore_index=True) return df_tradewar
def testQuery(): findQuery = { "$or": [{ "news_id": "c125c6c1c65249e0959c463d4b4d40f8" }, { "news_id": "fa374d6c476346eb84f92438d4f873a1" }] } mongoCon = mc.getDBCon() # connection db = mongoCon.production SentCollection = db.newsSentenceSentiments result = SentCollection.find(findQuery) for item in result: print(item['news_id'] + " : " + item['sentence']) mongoCon.close()
def get_theme_impact(theme, date): df = pd.DataFrame() mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact findQuery = {"theme": theme, "date": date} respone = dailyThemeImpact_collection.find(findQuery) for item in respone: print(item) df = df.append({ "date": item['date'], "impact": item['impact'] }, ignore_index=True, sort=True) return df
def insert_mongo(impactQuery): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact findQuery = impactQuery print(findQuery) sents_exist = check_if_impact_exist(db, findQuery) if sents_exist: print("Sentence Exists") else: try: dailyThemeImpact_collection.insert(impactQuery) print("Insert Done") except: print("Insert Error for Sentences") return True
def delete_duplicates(): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database sentence_collection = db.sentence_article_map # collection try: result = sentence_collection.ensureIndex({ "sentence": 1, "company": 1 }, { "unique": "true", "dropDups": "true" }) print(result) except: print("Delete Error") return None
def get_sentences_based_sentiment(list_news): query = {"news_id": {"$in": list_news}} mongoCon = mc.getDBCon() # connection db = mongoCon.production # database sentenceSentiment_collection = db.newsSentenceSentiments all_sentences = sentenceSentiment_collection.find(query) sentence_df = pd.DataFrame() for sentence in all_sentences: sentence_df = sentence_df.append( { "sentence": sentence['sentence'], "sentiment": sentence['sentiment'], "score": sentence['google_score'] }, ignore_index=True) mongoCon.close() return sentence_df
def get_impact(company,date,companyList): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact findQuery = {"theme": company,"date":date} respone = dailyThemeImpact_collection.find(findQuery) company_impact = 0 for item in respone: company_impact = item['impact'] findQuery = {"theme": {"$in":companyList}, "date": date} respone = dailyThemeImpact_collection.find(findQuery) industry_impact = 0 for item in respone: industry_impact = industry_impact + item['impact'] industry_impact = industry_impact/len(companyList) mongoCon.close() return [company_impact,industry_impact]
def insert_mongo(company,date,impactList): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpactIndustry_collection = db.dailyThemeImpactIndustry findQuery = {"company":company,"date":date} print(findQuery) sents_exist = check_if_impact_exist(db, findQuery) if sents_exist: print("Sentence Exists") else: try: query = {"date":date,"company":company,"impact":impact_list[0],"industry_average":impact_list[1]} dailyThemeImpactIndustry_collection.insert(query) print("Insert Done") except: print("Insert Error for Sentences") mongoCon.close() return True
def map_article_sentences(all_articles, companyName): #print(all_articles) # write to mongoDB mongoCon = mc.getDBCon() db = mongoCon.production sentence_collection = db.sentence_article_map sentences_list = [] for news in all_articles: sentences_dict = {} sentences = sa.get_Sentences(news['article'], news['_id'], news['date'], companyName) if (len(sentences) > 0): result = sentence_collection.insert_many(sentences) print(result.inserted_ids) sentences_dict["article_id"] = news['_id'] sentences_dict["sentences"] = sentences sentences_list.append(sentences_dict) return sentences_list
def write_mongo(theme,df): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact for idx,item in df.iterrows(): findQuery = {"theme":theme,"date":item['date']} #print(findQuery) impact_exist = check_if_impact_exist(db,findQuery) if impact_exist: print("Theme Impact for Day Exists") else: query = {"theme": theme, "date": item['date'], "impact": item['sentiment']} try: dailyThemeImpact_collection.insert(query) print("Insert Done") except: print("Insert Error for Sentences") mongoCon.close() return True
def read_tradewar_articles_bulk(): mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse themeArticle_collection = db.themeArticleMap # collection query = {"search_theme_id": "trade war"} all_articles_links = themeArticle_collection.find(query) article_list = [] newsArticle_collection = db.newsAPIArticles for x in all_articles_links: #process one article at a time query = {"url": x["url"]} #print(query) news_article = newsArticle_collection.find(query) for item in news_article: #print(item) item["pinalpha_news_id"] = x['pinalpha_news_id'] article_list.append(item) mongoCon.close() return article_list
def read_data(themeList): RawSentimentData = [] mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse themeSentArticleMap_collection = db.themeSentimentArticlesMap for theme in themeList: theme_data_df = pd.DataFrame() query = {"theme": theme} cursor = themeSentArticleMap_collection.find(query) for item in cursor: theme_data_df = theme_data_df.append( { "date": item['date'], "news_id": item, "sentiment": item['sentiment'] }, ignore_index=True) RawSentimentData.append(theme_data_df) return RawSentimentData
def write_mongo(theme, date): mongoCon = mc.getDBCon() # connection db = mongoCon.production # database dailyThemeImpact_collection = db.dailyThemeImpact findQuery = {"theme": theme, "date": date} # print(findQuery) impact_exist = check_if_impact_exist(db, findQuery) if impact_exist: print("Theme Impact for Day Exists") else: if theme == "loan_growth": query = {"theme": theme, "date": date, "impact": 37} else: query = {"theme": theme, "date": date, "impact": 55} try: dailyThemeImpact_collection.insert(query) print("Insert Done") except: print("Insert Error for Sentences") mongoCon.close() return True
def insert_to_mongo( theme, df_sentiment, ): mongoCon = mc.getDBCon() # connection db = mongoCon.production # databse themeSentArticleMap_collection = db.themeSentimentArticlesMap for idx, item in df_sentiment.iterrows(): query = { "theme": theme, "date": item['date'], "news_id": item['news_id'], "sentiment": item['sentiment'] } value_exist = check_if_impact_exist(db, query) if value_exist: print("Sentiment Exists") else: try: result = themeSentArticleMap_collection.insert(query) print(result) except: print("insertError") mongoCon.close()