Ejemplo n.º 1
0
def push_data_to_mongoDB(articles, search_query_id):
    db_connection = mongo.getDBCon()
    db = db_connection.production
    news_articles_collection = db.newsAPIArticles
    for article in articles:
        #check if news article exists
        if not check_if_news_exists(article["url"], db):
            #push the article
            try:
                #print(article)
                article["id"] = uuid.uuid4().hex
                result = news_articles_collection.insert(article)
                print(result)
            except:
                print("Error in Inserting to Mongo")
        #push the reference
        #create a pinAlpha News ID
        newsID = uuid.uuid4().hex
        input_dict = {
            "pinalpha_news_id": newsID,
            "search_theme_id": search_query_id,
            "url": article["url"],
            "date": article["publishedAt"][0:10]
        }
        print(input_dict)
        theme_article_collection = db["themeArticleMap"]
        theme_article_collection.insert(input_dict)
Ejemplo n.º 2
0
def get_sentiments(article_ids):
    df = pd.DataFrame({'date': [], 'sentiment': []})
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    article_collection = db.newsAPIArticles
    themeArticleMap_Collection = db.themeArticleMap
    article_sentiment_collection = db.newsArticleSentimentValues
    for item in article_ids:
        cursor = article_collection.find({"id": item})
        for document in cursor:
            theme_cursor = themeArticleMap_Collection.find(
                {"url": document['url']})
            for doc in theme_cursor:
                sentiment_cursor = article_sentiment_collection.find(
                    {"pinalpha_news_id": doc['pinalpha_news_id']})
                for sents in sentiment_cursor:
                    df = df.append(
                        {
                            "date": sents['date'],
                            "news_id": item,
                            "sentiment": sents['google_score']
                        },
                        ignore_index=True)
    mongoCon.close()
    return df
def get_content_from_articles(phrase, theme, YMdate):
    query = {
        "$and": [{
            "content": {
                "$regex": phrase
            }
        }, {
            "content": {
                "$regex": theme
            }
        }, {
            "publishedAt": {
                "$regex": YMdate
            }
        }]
    }
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    newsAPIArticles_collection = db.newsAPIArticles  # collection
    news_article = newsAPIArticles_collection.find(query)
    for item in news_article:
        extracted = summary.ExtractSummary(item['content'])
        if not phrase == "trade war":
            phrase = "SG Banks"
        query = {
            "date": YMdate,
            "type": phrase,
            "news_id": item['id'],
            "sentence": extracted
        }
        print(query)
        insert_mongo(query)
        break
    mongoCon.close()
Ejemplo n.º 4
0
def getnews_daily(company):
    query = {"company_name": company}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    companyTenMostRecent_collection = db.companyTenMostRecent
    result = companyTenMostRecent_collection.find(query)
    json_result = dumps(result)
    return json_result
Ejemplo n.º 5
0
def get_sentnces_daily(theme,date):
    query = {"theme":theme,"date":date}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeSentence_collection = db.dailyThemeSentence
    result = dailyThemeSentence_collection.find(query)
    json_result = dumps(result)
    return json_result
Ejemplo n.º 6
0
def get_sentences_with_theme(company, theme):
    mongoCon = mc.getDBCon()  #connection
    db = mongoCon.production  #databse
    sentence_collection = db.sentence_article_map  #collection
    query = {"$and": [{"company": company}, {"sentence": {"$regex": theme}}]}
    mydoc = sentence_collection.find(query)
    for x in mydoc:
        print(x)
    return None
Ejemplo n.º 7
0
def put_themes_data_to_db():
    mongo_con = mc.getDBCon()  #get mongoDB client
    prod_db = mongo_con['production']
    themes_collection = prod_db['themes']

    #read themes from file
    themes_filename = "./Data/CompanyThemes.csv"
    df_themes = get_themes_from_file()
    return df_themes
Ejemplo n.º 8
0
def delete_tradwar():
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    query = {"theme": "DBS"}
    dailyThemeImpact_collection.delete_many(query)
    query = {"theme": "dbs"}
    dailyThemeImpact_collection.delete_many(query)
    mongoCon.close()
Ejemplo n.º 9
0
def read_sentiment_from_mongo(theme):
    df_sentiments = pd.DataFrame()
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    themeSentimentArticlesMap_collection = db.themeSentimentArticlesMap
    query = {"theme":theme}
    response = themeSentimentArticlesMap_collection.find(query)
    for item in response:
        df_sentiments = df_sentiments.append({"date":item['date'],"sentiment":item['sentiment']},ignore_index=True)
    return df_sentiments
Ejemplo n.º 10
0
def get_article_ids(theme,date):
    query = {"$and":[{"theme":theme},{"date":date}]}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    themeSentArticleMap_collection = db.themeSentimentArticlesMap
    article_ids = []
    articles = themeSentArticleMap_collection.find(query)
    for item in articles:
        article_ids.append(item["news_id"])
    article_ids = list(set(article_ids))
    mongoCon.close()
    return article_ids
Ejemplo n.º 11
0
def update_mongo_sentiment(sentiment_list):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    articleSentiment_collection = db.articleSentiment  # collection
    #articleSentiment_collection.insert(sentiment_list)
    try:
        result = articleSentiment_collection.insert(sentiment_list)
        print(result)
    except:
        print("Insert Error - sentiment")
    mongoCon.close()
    return
Ejemplo n.º 12
0
def get_impact_of_theme_date(theme, date):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpactIntermediate_collection = db.dailyThemeImpactIntermediate
    query = {"theme": theme, "date": date}
    response = dailyThemeImpactIntermediate_collection.find(query)
    sentimentVal = "Negative"
    for item in response:
        if item['impact'] < 0:
            sentimentVal = "Negative"
        else:
            sentimentVal = "Positive"
    return sentimentVal
Ejemplo n.º 13
0
def get_content(articles):
    contents = []
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    newsAPIArticles_collection = db.newsAPIArticles  # collection
    for article in articles:
        searchQuery = {"url": article['url']}
        article_content = newsAPIArticles_collection.find(searchQuery)
        for item in article_content:
            content = item['content']
            contents.append(content)
    mongoCon.close()
    return contents
Ejemplo n.º 14
0
def get_data(searchTheme, startDate):
    searchQuery = {
        "$and": [{
            "date": startDate
        }, {
            "search_theme_id": searchTheme
        }]
    }
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    newsAPIArticles_collection = db.themeArticleMap  # collection
    news_article = newsAPIArticles_collection.find(searchQuery)
    mongoCon.close()
    return news_article  # this is a mongodb cursor
Ejemplo n.º 15
0
def get_sentence_summary(list_news):
    summary_list = {}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    newsArticle_collection = db.newsAPIArticles
    for id in list_news:
        query = {"id": id}
        all_articles = newsArticle_collection.find(query)
        for artilce in all_articles:
            content = artilce['content']
            article_summary = summary.ExtractSummary(content)
            summary_list[id] = article_summary
    mongoCon.close()
    return summary_list
Ejemplo n.º 16
0
def get_company_data(company):
    query = {"theme": company}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    all_company_impact = dailyThemeImpact_collection.find(query)
    df = pd.DataFrame()
    for item in all_company_impact:
        df = df.append({
            "date": item['date'],
            "impact": item['impact']
        },
                       ignore_index=True)
    return df
Ejemplo n.º 17
0
def get_tradewar_data():
    df_tradewar = pd.DataFrame()
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpactIntermediate
    findQuery = {"theme": "trade_war"}
    respone = dailyThemeImpact_collection.find(findQuery)
    for item in respone:
        df_tradewar = df_tradewar.append(
            {
                "date": item['date'],
                "impact": item['impact']
            },
            ignore_index=True)
    return df_tradewar
Ejemplo n.º 18
0
def testQuery():
    findQuery = {
        "$or": [{
            "news_id": "c125c6c1c65249e0959c463d4b4d40f8"
        }, {
            "news_id": "fa374d6c476346eb84f92438d4f873a1"
        }]
    }
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production
    SentCollection = db.newsSentenceSentiments
    result = SentCollection.find(findQuery)
    for item in result:
        print(item['news_id'] + " : " + item['sentence'])
    mongoCon.close()
Ejemplo n.º 19
0
def get_theme_impact(theme, date):
    df = pd.DataFrame()
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    findQuery = {"theme": theme, "date": date}
    respone = dailyThemeImpact_collection.find(findQuery)
    for item in respone:
        print(item)
        df = df.append({
            "date": item['date'],
            "impact": item['impact']
        },
                       ignore_index=True,
                       sort=True)
    return df
Ejemplo n.º 20
0
def insert_mongo(impactQuery):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    findQuery = impactQuery
    print(findQuery)
    sents_exist = check_if_impact_exist(db, findQuery)
    if sents_exist:
        print("Sentence Exists")
    else:
        try:
            dailyThemeImpact_collection.insert(impactQuery)
            print("Insert Done")
        except:
            print("Insert Error for Sentences")
    return True
Ejemplo n.º 21
0
def delete_duplicates():
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    sentence_collection = db.sentence_article_map  # collection
    try:
        result = sentence_collection.ensureIndex({
            "sentence": 1,
            "company": 1
        }, {
            "unique": "true",
            "dropDups": "true"
        })
        print(result)
    except:
        print("Delete Error")
    return None
Ejemplo n.º 22
0
def get_sentences_based_sentiment(list_news):
    query = {"news_id": {"$in": list_news}}
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    sentenceSentiment_collection = db.newsSentenceSentiments
    all_sentences = sentenceSentiment_collection.find(query)
    sentence_df = pd.DataFrame()
    for sentence in all_sentences:
        sentence_df = sentence_df.append(
            {
                "sentence": sentence['sentence'],
                "sentiment": sentence['sentiment'],
                "score": sentence['google_score']
            },
            ignore_index=True)
    mongoCon.close()
    return sentence_df
Ejemplo n.º 23
0
def get_impact(company,date,companyList):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    findQuery = {"theme": company,"date":date}
    respone = dailyThemeImpact_collection.find(findQuery)
    company_impact = 0
    for item in respone:
        company_impact = item['impact']

    findQuery = {"theme": {"$in":companyList}, "date": date}
    respone = dailyThemeImpact_collection.find(findQuery)
    industry_impact = 0
    for item in respone:
        industry_impact = industry_impact + item['impact']
    industry_impact = industry_impact/len(companyList)
    mongoCon.close()
    return [company_impact,industry_impact]
Ejemplo n.º 24
0
def insert_mongo(company,date,impactList):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpactIndustry_collection = db.dailyThemeImpactIndustry
    findQuery = {"company":company,"date":date}
    print(findQuery)
    sents_exist = check_if_impact_exist(db, findQuery)
    if sents_exist:
        print("Sentence Exists")
    else:
        try:
            query = {"date":date,"company":company,"impact":impact_list[0],"industry_average":impact_list[1]}
            dailyThemeImpactIndustry_collection.insert(query)
            print("Insert Done")
        except:
            print("Insert Error for Sentences")
    mongoCon.close()
    return True
Ejemplo n.º 25
0
def map_article_sentences(all_articles, companyName):
    #print(all_articles)
    # write to mongoDB
    mongoCon = mc.getDBCon()
    db = mongoCon.production
    sentence_collection = db.sentence_article_map

    sentences_list = []
    for news in all_articles:
        sentences_dict = {}
        sentences = sa.get_Sentences(news['article'], news['_id'],
                                     news['date'], companyName)
        if (len(sentences) > 0):
            result = sentence_collection.insert_many(sentences)
            print(result.inserted_ids)
            sentences_dict["article_id"] = news['_id']
            sentences_dict["sentences"] = sentences
            sentences_list.append(sentences_dict)
    return sentences_list
Ejemplo n.º 26
0
def write_mongo(theme,df):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    for idx,item in df.iterrows():
        findQuery = {"theme":theme,"date":item['date']}
        #print(findQuery)
        impact_exist = check_if_impact_exist(db,findQuery)
        if impact_exist:
            print("Theme Impact for Day Exists")
        else:
            query = {"theme": theme, "date": item['date'], "impact": item['sentiment']}
            try:
                dailyThemeImpact_collection.insert(query)
                print("Insert Done")
            except:
                print("Insert Error for Sentences")
    mongoCon.close()
    return True
Ejemplo n.º 27
0
def read_tradewar_articles_bulk():
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    themeArticle_collection = db.themeArticleMap  # collection
    query = {"search_theme_id": "trade war"}
    all_articles_links = themeArticle_collection.find(query)
    article_list = []
    newsArticle_collection = db.newsAPIArticles
    for x in all_articles_links:
        #process one article at a time
        query = {"url": x["url"]}
        #print(query)
        news_article = newsArticle_collection.find(query)
        for item in news_article:
            #print(item)
            item["pinalpha_news_id"] = x['pinalpha_news_id']
            article_list.append(item)
    mongoCon.close()
    return article_list
Ejemplo n.º 28
0
def read_data(themeList):
    RawSentimentData = []
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    themeSentArticleMap_collection = db.themeSentimentArticlesMap
    for theme in themeList:
        theme_data_df = pd.DataFrame()
        query = {"theme": theme}
        cursor = themeSentArticleMap_collection.find(query)
        for item in cursor:
            theme_data_df = theme_data_df.append(
                {
                    "date": item['date'],
                    "news_id": item,
                    "sentiment": item['sentiment']
                },
                ignore_index=True)
        RawSentimentData.append(theme_data_df)
    return RawSentimentData
Ejemplo n.º 29
0
def write_mongo(theme, date):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # database
    dailyThemeImpact_collection = db.dailyThemeImpact
    findQuery = {"theme": theme, "date": date}
    # print(findQuery)
    impact_exist = check_if_impact_exist(db, findQuery)
    if impact_exist:
        print("Theme Impact for Day Exists")
    else:
        if theme == "loan_growth":
            query = {"theme": theme, "date": date, "impact": 37}
        else:
            query = {"theme": theme, "date": date, "impact": 55}
        try:
            dailyThemeImpact_collection.insert(query)
            print("Insert Done")
        except:
            print("Insert Error for Sentences")
    mongoCon.close()
    return True
Ejemplo n.º 30
0
def insert_to_mongo(
    theme,
    df_sentiment,
):
    mongoCon = mc.getDBCon()  # connection
    db = mongoCon.production  # databse
    themeSentArticleMap_collection = db.themeSentimentArticlesMap
    for idx, item in df_sentiment.iterrows():
        query = {
            "theme": theme,
            "date": item['date'],
            "news_id": item['news_id'],
            "sentiment": item['sentiment']
        }
        value_exist = check_if_impact_exist(db, query)
        if value_exist:
            print("Sentiment Exists")
        else:
            try:
                result = themeSentArticleMap_collection.insert(query)
                print(result)
            except:
                print("insertError")
    mongoCon.close()