Python GoogleNews Exemples, pygooglenews.GoogleNews Python Exemples

Exemple #1

0

Afficher le fichier

def get_news():
    r = []
    gn = GoogleNews(lang=NEWS_LANG, country=NEWS_COUNTRY)
    for s in NEWS_SEARCHES:
        search = gn.search(s, when=NEWS_PERIOD)
        r += search.get('entries')
        print(r[0])
        import time
        time.sleep(100)
    return r

Exemple #2

0

Afficher le fichier

def getNewsClimate(max_articles=20):
    gn = GoogleNews(lang='en', country='US')
    climate_news = gn.topic_headlines('CAAqBwgKMKeh0wEw-sE1')
    search = climate_news['entries'][:max_articles]
    rtn = {}
    for i in range(len(search)):
        rtn[i] = {
            'title': search[i].title,
            'href': search[i]['links'][0]['href']
        }
    return rtn

Exemple #3

0

Afficher le fichier

def getNews(query, max_articles=20, time_period='1m'):
    gn = GoogleNews(lang='en', country='US')
    search = gn.search(query, when=time_period)
    search = search['entries'][:max_articles]
    rtn = {}
    for i in range(len(search)):
        rtn[i] = {
            'title': search[i].title,
            'href': search[i]['links'][0]['href']
        }
    return rtn

Exemple #4

0

Afficher le fichier

Fichier : helper.py Projet : Bruck1701/NewsDiscordBot

def get_articles(lang, country, number_articles):

    gn = GoogleNews(lang.lower(), country.upper())
    gn.BASE_URL = gn.BASE_URL + "?hl={}&gl={}".format(gn.lang, gn.country)
    top_news = gn.top_news()
    entries = top_news["entries"]

    if number_articles < len(entries):
        entries = entries[:number_articles]

    return entries

Exemple #5

0

Afficher le fichier

def main():
    print("hello world")

    gn = GoogleNews()

    top = gn.top_news()

    for i in range(10):
        print(top['entries'][i])

    return 0

Exemple #6

0

Afficher le fichier

Fichier : main.py Projet : AlexGustafsson/irc-news-bot

def handle_news_request(irc: IRC, nick: str, target: str,
                        message: IRCMessage) -> None:
    """Handle a news request."""
    words = message.message.replace("{}:".format(nick), "").strip().split()
    if len(words) < 3:
        irc.send_message(target, "Bad command. See help message")
        return
    command = words[0]
    country = words[1]
    language = words[2]
    parameter = " ".join(words[3:])

    logger.info("Handling news request. Command=%s, country=%s, language=%s",
                command, country, language)
    google_news = GoogleNews(country=country, lang=language)

    try:
        if command == "topic":
            handle_topic(irc, google_news, target, parameter)
        elif command == "location":
            handle_location(irc, google_news, target, parameter)
        elif command == "search":
            handle_search(irc, google_news, target, parameter)
        elif command == "top":
            handle_top(irc, google_news, target)
        else:
            irc.send_message(target, "I don't recognize that command")
    except Exception:  # pylint: disable=broad-except
        logger.error("Unable to fetch news", exc_info=True)
        irc.send_message(target, "I was unable to fetch news")

Exemple #7

0

Afficher le fichier

Fichier : main.py Projet : AlexGustafsson/irc-news-bot

def handle_top(irc: IRC, google_news: GoogleNews, target: str) -> None:
    """Handle a search request."""
    irc.send_message(target, "Working on it")
    logger.info("Fetching top news")
    news = google_news.top_news()
    for entry in news["entries"][:5]:
        irc.send_message(target, entry["title"])

Exemple #8

0

Afficher le fichier

Fichier : main.py Projet : AlexGustafsson/irc-news-bot

def handle_search(irc: IRC, google_news: GoogleNews, target: str,
                  query: str) -> None:
    """Handle a search request."""
    irc.send_message(target, "Working on it")
    logger.info("Fetching news for query %s", query)
    news = google_news.search(query)
    for entry in news["entries"][:5]:
        irc.send_message(target, entry["title"])

Exemple #9

0

Afficher le fichier

Fichier : main.py Projet : AlexGustafsson/irc-news-bot

def handle_location(irc: IRC, google_news: GoogleNews, target: str,
                    location: str) -> None:
    """Handle a location request."""
    irc.send_message(target, "Working on it")
    logger.info("Fetching news for location %s", location)
    news = google_news.geo_headlines(location)
    for entry in news["entries"][:5]:
        irc.send_message(target, entry["title"])

Exemple #10

0

Afficher le fichier

Fichier : data.py Projet : CDORID/MPG

        def get_news(self):

            ## get google news from player
            gn = GoogleNews()
            keys = str(
                str(self.data_player.iloc[0, :]['info_lastname'] + ' ' +
                    str(self.player_team) + ' football'))
            search = gn.search(keys)
            news = pd.DataFrame(search['entries'])
            news['Date'] = news['published_parsed'].apply(lambda x: str(
                x.tm_mday) + '/' + str(x.tm_mon) + '/' + str(x.tm_year))
            news = news[['Date', 'title', 'link']].head(30)
            news = news.rename(columns={'title': 'Title', 'link': 'Link'})
            table = news.to_dict('records')
            columns = [{"name": i, "id": i} for i in news.columns]
            #    table   = mf.MyFormating().table_news_player(news)
            return table, columns

Exemple #11

0

Afficher le fichier

    def scrape_google_news(self, company_name, start_date, end_date, bodies):
        try:
            gn = GoogleNews(lang=self.language, country='US')
            news_entries = gn.search(company_name,
                                     helper=True,
                                     when=None,
                                     from_=start_date,
                                     to_=end_date,
                                     proxies=None,
                                     scraping_bee=None)
            DATE_SLICE = slice(0, 16)

            if bodies:
                return [{
                    "headline":
                    entry['title'],
                    "body":
                    BeautifulSoup(entry['summary'], "html.parser").get_text(),
                    "source":
                    entry['link'],
                    "published_date":
                    format_date(entry['published'][DATE_SLICE],
                                to_datetime=True,
                                current_format='%a, %d %b %Y'),
                    "company_name":
                    company_name
                } for entry in news_entries['entries']]

            return [{
                "headline":
                entry['title'],
                "source":
                entry['link'],
                "published_date":
                format_date(entry['published'][DATE_SLICE],
                            to_datetime=True,
                            current_format='%a, %d %b %Y'),
                "company_name":
                company_name
            } for entry in news_entries['entries']]

        except Exception as e:
            abort(400, e)

Exemple #12

0

Afficher le fichier

class NewsSpider(scrapy.Spider):
    name = 'news'
    # Получение списка ссылок на новости при помощи модуля GoogleNews
    gn = GoogleNews(lang='en', country='US')
    s = gn.search('Russia', from_='2020-10-08', to_='2020-11-08')
    links = []
    for x in range(0, 100):
        links.append(s['entries'][x]['link'])

    start_urls = links

    def parse(self, response):
        with open('News.txt', 'a') as file:
            file.write(str(response.css('p::text').getall()))

Exemple #13

0

Afficher le fichier

def news_sentiment_analysis(word, n):  #query, num articles
    titles = []
    keywords = []
    polarities = []
    subjectivities = []
    links = []
    texts = []
    gn = GoogleNews(lang='en', country='US')
    s = gn.search(word)
    i = 0
    for article in s['entries']:
        if i >= n:
            break
        try:

            a = Article(url=article['link'])
            a.download()
            a.parse()
            titles.append(article['title'])
            links.append(article['link'])
            texts.append(a.text)
            a.nlp()
            keywords.append(a.keywords)
            analysis = TextBlob(a.text)
            polarities.append(analysis.sentiment.polarity)
            subjectivities.append(analysis.sentiment.subjectivity)
        except:
            continue
        i += 1
    di = {
        'title': titles,
        'text': texts,
        'keywords': keywords,
        'polarity': polarities,
        'subjectivity': subjectivities
    }
    return pd.DataFrame.from_dict(di)

Exemple #14

0

Afficher le fichier

Fichier : google_news_daily.py Projet : feluelle/finance-data-builder

 def extract_news_data(company: str, ds: str, next_ds: str,
                       **kwargs: dict) -> str:
     directory, file = f'{LOCAL_STORAGE}/google_news/{company}', f'{ds}.json'
     # Create directory in case it does not already exist
     path = Path(directory)
     path.mkdir(parents=True, exist_ok=True)
     # Download data
     google_news_data = GoogleNews().search(query=company,
                                            from_=ds,
                                            to_=next_ds)
     # Write data to storage in json format
     full_path = path.joinpath(file)
     with open(full_path, 'w') as fp:
         json.dump(google_news_data, fp)
     return str(full_path)

Exemple #15

0

Afficher le fichier

class API:
    def __init__(self, terms, lang, country):
        self.terms = terms
        self.client = GoogleNews(lang, country)

    @staticmethod
    def format(x):
        try:
            return unidecode(x['title'])
        except KeyError:
            return ''

    def __call__(self, from_, to_):
        results = []
        for q in self.terms:
            results += self.client.search(query=q, from_=from_,
                                          to_=to_)['entries']
        return list(set(API.format(r) for r in results))

Exemple #16

0

Afficher le fichier

Fichier : main.py Projet : AlexGustafsson/irc-news-bot

def handle_topic(irc: IRC, google_news: GoogleNews, target: str,
                 topic: str) -> None:
    """Handle a topic request."""
    irc.send_message(target, "Working on it")
    logger.info("Fetching news for topic %s", topic)
    try:
        news = google_news.topic_headlines(topic)
        for entry in news["entries"][:5]:
            irc.send_message(target, entry["title"])
    except Exception as exception:  # pylint: disable=broad-except
        if str(exception) == "unsupported topic":
            # See: https://github.com/kotartemiy/pygooglenews#stories-by-topic-1
            irc.send_message(
                target, "That topic is not supported. Supported topics are:")
            irc.send_message(
                target,
                "world, nation, business, technology, entertainment, science, sports, health"
            )
        else:
            raise exception

Exemple #17

0

Afficher le fichier

from pygooglenews import GoogleNews
import streamlit as st
import pandas as pd
f'''
-----------------------------------

# Google News Feed Searcher

-----------------------------------
'''

gn = GoogleNews()

search_term = st.text_input('Search Term:', 'Trump')
search_range = st.slider('Search Range (days):', 1, 365, 1)

search = gn.search(search_term, when=f'{search_range}d')

data = pd.DataFrame.from_dict(search['entries'])
f'''
# {search_term} News Articles
Last *{search_range} day/s*

------------------------------------------- 
'''

# Display articles found
for row in range(1, data.shape[1]):
    f'''
    ## {data['published'].iloc[row]}
    ### {data['title'].iloc[row]}

Exemple #18

0

Afficher le fichier

Fichier : news_classifier.py Projet : Priyanshu-Shrivastava/News_Article-Classifier

plt.figure(figsize=(8, ))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label='ROC curve of {0} (area = {1:0.9f})'
                                   ''.format(category_id_df[category_id_df['category_id'] == i].Category, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()

pip install pygooglenews

from pygooglenews import GoogleNews
# default GoogleNews instance
gn = GoogleNews(lang = 'en', country = 'US')

business = gn.topic_headlines('BUSINESS', proxies=None, scraping_bee = None)

pip install newsapi-python

from newsapi import NewsApiClient

api = NewsApiClient(api_key='b1b6e4269a814e8bbe8a477b3226e918')

business = api.get_top_headlines(category = 'business', language = 'en', country='us')

sport = api.get_top_headlines(category = 'sports', language = 'en', country='us')

enter = api.get_top_headlines(category = 'entertainment', language = 'en', country='us')

Exemple #19

0

Afficher le fichier

 def _get_api(self):
     return GoogleNews()

Exemple #20

0

Afficher le fichier

Fichier : django_auto_add.py Projet : SayC0der/News_feed_blog

from pygooglenews import GoogleNews
import time
from linkpreview import link_preview
import os
import django

gn = GoogleNews(lang='ar')
keyword_results = gn.search('أندرويد')
data = keyword_results['entries']

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'news_feed.settings')
django.setup()

from articles.models import article as classy

all_news = []
test_counter = 0

for i in data:
    try:
        preview = link_preview(i.link)
        components = {'title': i.title, 'link': i.link, 'image': preview.image}

        if str(components['image']) == 'None':
            pass
        else:
            new_data = classy(title=components['title'],
                              link=components['link'],
                              image=components['image'])
            new_data.save()
            print('\n')

Exemple #21

0

Afficher le fichier

from pygooglenews import GoogleNews

gn = GoogleNews(lang='pt')
top = gn.top_news()

for item in top['entries']:
    print(item.title)

Exemple #22

0

Afficher le fichier

Fichier : scrape.py Projet : AeroLad/eduroundup

objConfig = Config()
objConfig.browser_user_agent = strUserAgent

objEngine = create_engine('', echo=False)
objSession = sessionmaker(bind=objEngine)()
objToday = datetime.date.today()
objYesterday = objToday - datetime.timedelta(days=2)
strToday = objToday.strftime("%Y-%m-%d")
strYesterday = objYesterday.strftime("%Y-%m-%d")

proxyDict = None

objWPClient = WPClient()

print("Obtaining news")
objGnews = GoogleNews(lang='en', country='pk')
objSearch = objGnews.search('education school university pakistan',
                            proxies=proxyDict,
                            from_=strYesterday,
                            to_=strToday)

arrResults = objSearch['entries']

for dictResult in arrResults:
    dictResult['published'] = dateparser.parse(dictResult['published']).date()
print("Obtained news: {0} items".format(len(arrResults)))

arrKeywords = [
    "school", "education", "school", "university", "class", "grade",
    "kindergarten", "student", "institute"
]

Exemple #23

0

Afficher le fichier

Fichier : customCyberFeed.py Projet : n0x-L/Daily_Cyber_News_Feed

Default accepted topics are: 
world, nation, business, technology, entertainment
science, sports, health 
 
"""

from pygooglenews import GoogleNews
import smtplib

from datetime import datetime
from pytz import timezone

from email.mime.text import MIMEText

# Initiate Google News class to get access to all functions
gn = GoogleNews(lang='en', country='CA')

# For time conversion crap
mountain = timezone('America/Edmonton')
gmt = timezone('GMT')

# Initialize other vars
email_message = ''


# Some helpful functions
def print_titles(results):
    for x in results['entries']:
        print('\n' + x['title'])
    print('\n')

Exemple #24

0

Afficher le fichier

             "Sandro Alex","Sargento Fahur","Sebastião Oliveira","Sérgio Brito","Sergio Souza","Sérgio Toledo",
             "Sérgio Vidigal","Severino Pessôa","Shéridan","Sidney Leite","Silas Câmara","Silvia Cristina",
             "Sílvio Costa Filho","Soraya Santos","Sóstenes Cavalcanti","Stefano Aguiar","Subtenente Gonzaga",
             "Tabata Amaral","Tadeu Alencar","Talíria Petrone","Tereza Cristina","Tereza Nelma","Tiago Dimas",
             "Tiago Mitraud","Tiririca","Tito","Toninho Wandscheer","Túlio Gadêlha","Uldurico Júnior",
             "Vaidon Oliveira","Valdevan Noventa","Valmir Assunção","Vanda Milani","Vander Loubet","Vanderlei Macris",
             "Vavá Martins","Vermelho","Vicentinho","Vicentinho Júnior","Vilson da FETAEMG","Vinicius Carvalho",
             "Vinícius Farah","Vinícius Gurgel","Vinicius Poit","Vitor Hugo","Vítor Lippi","Wagner Montes",
             "Waldenor Pereira","Walter Alves","Weliton Prado","Wellington Roberto","Wilson Santiago",
             "Wladimir Garotinho","Wolney Queiroz","Zé Carlos","Zé Mário","Zé Neto","Zé Silva","Zé Vitor","Zeca Dirceu"]

from pandas import DataFrame
from pygooglenews import GoogleNews


gn = GoogleNews(lang = 'pt', country = 'BR')
top = gn.top_news()

news = []
contagem = 0

for dep in range(len(deputados_manchetes)):
    s = gn.search('intitle:{}'.format(deputados_manchetes[dep]), when = '336h')


    for x in range(len(s.get('entries'))):
        news.append([deputados_manchetes[dep], s.get('entries')[x].get('title'),s.get('entries')[x].get('link'),s.get('entries')[x].get('published'),
                     s.get('entries')[x].get('source').get('href'),s.get('entries')[x].get('source').get('title')])
    
    contagem += 1
    print(contagem)

Exemple #25

0

Afficher le fichier

Fichier : Covid-19NewsMain.py Projet : sghosh0365/Covid-19NewsFeed

			f_issue = open(Issue_counter_filename, "r")
			var_issue_counter = int(f_issue.readline())
			f_issue.close()
			var_issue_counter += 1
			f_issue = open(Issue_counter_filename, "w")
			f_issue.write(str(var_issue_counter))
			f_issue.close()
		else:
			var_issue_counter = 1
			f_issue = open(Issue_counter_filename, "w")
			f_issue.write(str(var_issue_counter))
			f_issue.close()
			issue_counter_flg=1
    today = date.today()
    issue_date = today.strftime("%B %d %Y")
    gn = GoogleNews(lang='en', country='IN')
    covid_news = gn.topic_headlines('CAAqIggKIhxDQkFTRHdvSkwyMHZNREZqY0hsNUVnSmxiaWdBUAE')
    covid_news_local=gn.search(f'coronavirus {loc}')
    f_more = open(more_stories_filename, encoding='utf-8', mode="a")
    f_more.write('<!DOCTYPE html>')
    f_more.write('<html>')
    f_more.write('<body>')
    f = open(html_filename, encoding='utf-8', mode="a")
    f.write('<!DOCTYPE html>')
    f.write('<html>')
    f.write('<body>')
    f.write('<div>')
    cover_image_idx=np.random.randint(0,7)
    cover_image=coverpic_li[cover_image_idx]
    f.write(f'<img src={cover_image} width="1000" height="400">')
    f.write(

Exemple #26

0

Afficher le fichier

 def __init__(self, terms, lang, country):
     self.terms = terms
     self.client = GoogleNews(lang, country)

Exemple #27

0

Afficher le fichier

def execute_interview_request(ir_object):
    LOGGER.info(
        f'[tag:INTRUNTER10] tasks.execute_interview_request: received execute request for ir_id: {ir_object.id}'
    )

    alphabet_list = ['A', 'B', 'C', 'D', 'E', 'F']

    tag_dict = {
        'funding': {
            'keywords': [
                'funding', 'investor', 'valuation', 'term sheet',
                'venture capital', 'venture debt'
            ],
            'mail_tag_line':
            'Type {}: Funding'
        },
        'acquisition': {
            'keywords': ['acquisition', 'acquired'],
            'mail_tag_line': 'Type {}: Acquisition'
        },
        'collabaration': {
            'keywords': ['collabarate', 'collabaration'],
            'mail_tag_line': 'Type {}: Collabaration'
        },
        'social good': {
            'keywords': ['donate'],
            'mail_tag_line': 'Type {}: Strategic initiative'
        },
        'covid': {
            'keywords': ['covid'],
            'mail_tag_line': 'Type {}: Covid'
        },
    }

    # ir_object = InterviewRequest.objects.get(id=ir_id)
    irr_object = get_object_or_None(InterviewRequestResult,
                                    type_form_id=ir_object.type_form_id,
                                    interview_request_id=ir_object.id,
                                    company_id=ir_object.company.id,
                                    user=ir_object.user.id)
    if not irr_object:
        irr_object = InterviewRequestResult(
            type_form_id=ir_object.type_form_id,
            is_published=False,
            interview_request_id=ir_object.id,
            company_id=ir_object.company.id,
            user_id=ir_object.user.id,
        )
        irr_object.save()

    user_name = ir_object.user.first_name
    company_name = ir_object.company.name
    result_data = dict()
    try:
        post_log("Getting news from google", 'STARTED')
        attachment_file_list = []
        user_tag_list = []
        from pygooglenews import GoogleNews
        gn = GoogleNews()
        s = gn.search(company_name.lower())
        final_data = []
        for news in s['entries']:
            new_dict = {
                'title': news['title'],
                'link': news['link'],
                'published': news['published']
            }
            summary_texts = []
            tags = []
            try:
                soup = BeautifulSoup(
                    requests.get(news['link'], timeout=300).content,
                    "html.parser")
                for p in soup.findAll('p'):
                    # print(p.text)
                    dummy_text = p.text
                    tags.extend(get_tag(dummy_text))
                    if "“" in dummy_text:
                        summary_texts.append(dummy_text)
                        # break
                if summary_texts:
                    new_dict['summary'] = summary_texts
                    new_dict['tags'] = list(set(tags))
                    user_tag_list.extend(new_dict['tags'])
                    final_data.append(new_dict)
            except Exception as e:
                print(f"{e} : {news}")
        result_data['news_data'] = final_data
        user_email = ir_object.user.email
        post_log(f"Getting news from google for {user_email}", 'COMPLETED')
        # creating a Dataframe object
        news_df = pd.DataFrame(final_data)
        news_df['Date'] = pd.to_datetime(news_df['published'], errors='coerce')
        news_df.sort_values(by=['Date'], inplace=True, ascending=False)
        del news_df['Date']
        file_name = f'{company_name}_Scrapped News.csv'
        news_df.to_csv(f'{DEFAULT_PATH}/{file_name}')
        post_log(f"File creation for the scrapped news for {user_email}",
                 'COMPLETED')
        attachment_file_list.append(file_name)
        google_play_app_id = ir_object.company.google_play_app_id
        if google_play_app_id:
            post_log(f"Srapping reviews for the app for {user_email}",
                     'STARTED')
            result = reviews_all(
                google_play_app_id,
                sleep_milliseconds=0,  # defaults to 0
                lang='en',  # defaults to 'en'
                country='us',  # defaults to 'us'
                sort=Sort.NEWEST  # defaults to Sort.MOST_RELEVANT
                # filter_score_with=5 # defaults to None(means all score)
            )
            post_log(f"Srapping reviews for the app for {user_email}",
                     'COMPLETED')

            df = pd.DataFrame(result)
            # df = pd.read_csv('{DEFAULT_PATH}/Netflix_all_reviews.csv')
            # print(df.head())
            # Product Scores
            # post_log(f"Histogram creation for the app reviews for {user_email}", 'STARTED')
            # fig = px.histogram(df, x="score")
            # fig.update_traces(marker_color="turquoise", marker_line_color='rgb(8,48,107)',
            #                   marker_line_width=1.5)
            # fig.update_layout(title_text='Product Score')
            # HTML(fig.to_html())
            # fig.write_image(f"{DEFAULT_PATH}/{company_name}_playstore_ratings.png")
            # plt.show()
            # plt.savefig(f'{DEFAULT_PATH}/{company_name}_playstore_ratings.png')
            # attachment_file_list.append(f"{company_name}_playstore_ratings.png")
            # post_log(f"Histogram creation for the app reviews for {user_email}", 'COMPLETED')
            reviews_df = df
            # reviews_df["review"] = reviews_df["content"].apply(lambda x: x.replace("No Negative", "").replace("No Positive", ""))
            reviews_df["is_bad_review"] = reviews_df["score"].apply(
                lambda x: 1 if x < 3 else 0)
            # select only relevant columnss
            reviews_df = reviews_df[[
                "content", "reviewCreatedVersion", "at", "is_bad_review"
            ]]
            # reviews_df.head()
            reviews_df["review"] = reviews_df["content"]
            # reviews_df
            post_log(f"Sentiment analysis for {user_email}", 'STARTED')
            # return the wordnet object value corresponding to the POS tag

            # clean text data
            reviews_df["review_clean"] = reviews_df["review"].apply(
                lambda x: clean_text(x))
            # add sentiment anaylsis columns

            sid = SentimentIntensityAnalyzer()
            reviews_df["sentiments"] = reviews_df["review"].apply(
                lambda x: sid.polarity_scores(str(x)))
            reviews_df = pd.concat([
                reviews_df.drop(['sentiments'], axis=1),
                reviews_df['sentiments'].apply(pd.Series)
            ],
                                   axis=1)
            # add number of characters column
            reviews_df["nb_chars"] = reviews_df["review"].apply(
                lambda x: len(str(x)))

            # add number of words column
            reviews_df["nb_words"] = reviews_df["review"].apply(
                lambda x: len(str(x).split(" ")))
            # create doc2vec vector columns

            documents = [
                TaggedDocument(doc, [i])
                for i, doc in enumerate(reviews_df["review_clean"].apply(
                    lambda x: str(x).split(" ")))
            ]

            # train a Doc2Vec model with our text data
            model = Doc2Vec(documents,
                            vector_size=5,
                            window=2,
                            min_count=1,
                            workers=4)

            # transform each document into a vector data
            doc2vec_df = reviews_df["review_clean"].apply(
                lambda x: model.infer_vector(str(x).split(" "))).apply(
                    pd.Series)
            doc2vec_df.columns = [
                "doc2vec_vector_" + str(x) for x in doc2vec_df.columns
            ]
            reviews_df = pd.concat([reviews_df, doc2vec_df], axis=1)
            # add tf-idfs columns
            tfidf = TfidfVectorizer(min_df=10)
            tfidf_result = tfidf.fit_transform(
                reviews_df["review_clean"]).toarray()
            tfidf_df = pd.DataFrame(tfidf_result,
                                    columns=tfidf.get_feature_names())
            tfidf_df.columns = ["word_" + str(x) for x in tfidf_df.columns]
            tfidf_df.index = reviews_df.index
            reviews_df = pd.concat([reviews_df, tfidf_df], axis=1)
            # show is_bad_review distribution
            # reviews_df["sentiment"].value_counts(normalize = True)
            post_log(f"Sentiment analysis for {user_name}", 'COMPLETED')

            # print wordcloud
            post_log(f"Creating word cloud for {user_name}", 'STARTED')
            wc_name = show_wordcloud(reviews_df["review"], company_name)
            attachment_file_list.append(wc_name)
            post_log(f"Creating word cloud for {user_name}", 'COMPLETED')
            # highest positive sentiment reviews (with more than 5 words)
            reviews_df[reviews_df["nb_words"] >= 5].sort_values(
                "pos", ascending=False)[["review", "pos"]].head(10)

            # show is_bad_review distribution
            reviews_df["is_bad_review"].value_counts(normalize=True)

            # lowest negative sentiment reviews (with more than 5 words)
            post_log(f"Creating negative reviews csv for {user_name}",
                     'STARTED')
            negative_df = reviews_df[reviews_df["nb_words"] >= 5].sort_values(
                "neg", ascending=False)[["content", "neg"]].head(50)
            negative_df.to_csv(
                f'{DEFAULT_PATH}/{company_name}_negative_reviews.csv',
                columns=["content"])
            attachment_file_list.append(f'{company_name}_negative_reviews.csv')
            negative_reviews_data = negative_df.to_json(orient="split")
            parsed = json.loads(negative_reviews_data)
            result_data['negative_reviews'] = parsed
            post_log(f"Creating negative reviews csv for {user_name}",
                     'COMPLETED')
        else:
            attachment_file_list.extend(
                ['app_playstore.png', 'app_word_cloud.png'])
        # gbrowniepoint
        post_log(f"Creation of email body for {user_name}", 'STARTED')

        # Set Global Variables
        gmail_user = '******'
        gmail_password = GMAIL_PASSWORD

        fromaddr = "*****@*****.**"
        toaddr = "*****@*****.**"

        # instance of MIMEMultipart
        msg = MIMEMultipart()

        # storing the senders email address
        msg['From'] = fromaddr

        # storing the receivers email address
        msg['To'] = toaddr

        # storing the subject
        msg['Subject'] = f"Interview Brownie : {user_name}'s report"

        # string to store the body of the mail
        body = f'''
                <p>Hi {user_name},</p>
                <div dir="ltr"><br />Here is your report<br /><br /><strong><u>1. PR synthesis</u></strong>&nbsp;<br /><br />
              '''
        # print(f'body before adding tags : {body}')
        # print(user_tag_list)
        for index, tag in enumerate(list(set(user_tag_list))):
            tag_data = get_first_tag_quotes(tag, final_data)
            type_str = tag_dict[tag]['mail_tag_line'].format(
                alphabet_list[index])
            summary = '<br />'.join(map(str, tag_data["summary"]))
            body = body + f'<u>{type_str}</u><br /><br />Quote:<br />&nbsp;&ldquo;{summary}<br />Source: <a href="{tag_data["link"]}" target="_blank">{tag_data["title"]}</a><br /><br />'

        # print(f'body after adding tags : {body}')

        body = body + f'''
              <p><strong><u><em>How do you use these insights in your interview?<br /></em></u></strong><br />
              Interviewer - Do you have any questions for us?<br />{user_name} - Yes, I read about the launch of ASAP - how do people get assigned to such projects internally?<br /><br />
              From Type A.<br /><br />Another one,<br />{user_name} - I also read about the platform for data collaboration for covid - amazing to see the pace of execution on that one, how is that going?<br /><br />
              From type B<br /><br />{user_name} - There were 40 million raised for the clinical analysis, do we raise money for specific projects / verticals or was this a covid specific development?<br /><br />
              From type C.<br /><br />Now remember, these are just examples and you should be able to come up with genuine talking points, questions, things that you can relate to now with minimal effort of going through the links <br /><br />You can also find a consolidated list of all public mentions of {company_name} in the past year attached.<br /><u></u></p><div dir="ltr">&nbsp;</div>
              '''
        if google_play_app_id:
            body = body + f'''<div dir="ltr"><strong><u>2. End user understanding</u></strong></div>
                  <ul>
                  <li>A significant chunk of the bad ratings of the app are generic bad reviews, investing in talking to these consumers might uncover issues yet unknown</li>
                  <li>1 peculiar thing was the mention of cbse in a cluster of reviews, the CBSE learning experience might have some issues in particular</li>
                  </ul>
                  &nbsp;</div>
                  <div>This is a word cloud from all the positive reviews,<br />
                  <br><img src="cid:1"><br>
                  <ul>
                  <li>The trend of generic reviews continues here as well, 1 suggestion could be to request reviewers to write a few lines describing what they loved about their experience</li>
                  </ul>
                  <div>Thanks for trying out the beta, please feel free to revert with any questions, suggestions/ feedback etc and it will be super helpful to us if you can share this in your network - a linkedin post talking about your experience will help us reach more people<br /><br />If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy&nbsp;<br /><br />Cheers,</div>
                  </div>
                  <p>--</p>
                  <div dir="ltr" data-smartmail="gmail_signature">
                  <div dir="ltr">
                  <div>
                  <div dir="ltr">
                  <div dir="ltr">
                  <div dir="ltr">
                  <div>Gaurav Dagde and Gagan Gehani</div>
                  </div>
                  </div>
                  </div>
                  </div>
                  </div>
                  </div>'''
        else:
            body = body + f'''
                <div><strong><u>2. End user understanding<br /></u></strong></div>
                <div><br />Playstore reviews - Our system couldn't find {company_name} app on the playstore.
                 Nonetheless, I am attaching screenshots of the output of another beta tester to give you a taste of what you can expect from this section</div>
                <br><img src="cid:0"><br>
                <br><img src="cid:1"><br>
                <p>If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy
                <br /><br />All the best for your interview!</p>
                <div>Thanks for trying out the beta, please feel free to revert with any questions, suggestions/ feedback etc and it will be super helpful to us if you can share this in your network - a linkedin post talking about your experience will help us reach more people<br /><br />If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy&nbsp;<br /><br />Cheers,</div>
                </div>
                <p>--</p>
                <div dir="ltr" data-smartmail="gmail_signature">
                <div dir="ltr">
                <div>
                <div dir="ltr">
                <div dir="ltr">
                <div dir="ltr">
                <div>Gaurav Dagde and Gagan Gehani</div>
                </div>
                </div>
                </div>
                </div>
                </div>
                </div
                '''

        result_data['mail_body'] = body
        # attach the body with the msg instance
        msg.attach(MIMEText(body, 'html', 'utf-8'))
        # file_list = ['ps_image.png',file_name]
        img_count = 0
        for attach_file in attachment_file_list:
            # open the file to be sent
            # filename = file_name
            attachment = open(f'/app/mail_content/{attach_file}', "rb")
            # to add an attachment is just add a MIMEBase object to read a picture locally.
            post_log(f"filename : {attach_file}", "IN_PROGRESS")
            if '.png' in attach_file:
                # post_log(f"In PNG block", "IN_PROGRESS")
                # with open(f'/app/mail_content/{attach_file}', 'rb') as attachment:
                # set attachment mime and file name, the image type is png
                mime = MIMEBase('image', 'png', filename=attach_file)
                # add required header data:
                mime.add_header('Content-Disposition',
                                'attachment',
                                filename=attach_file)
                mime.add_header('X-Attachment-Id', '{}'.format(img_count))
                mime.add_header('Content-ID', '<{}>'.format(img_count))
                # read attachment file content into the MIMEBase object
                mime.set_payload(attachment.read())
                # encode with base64
                encoders.encode_base64(mime)
                # add MIMEBase object to MIMEMultipart object
                msg.attach(mime)
                img_count += 1
            else:
                # post_log(f"In else block", "IN_PROGRESS")
                # instance of MIMEBase and named as p
                p = MIMEBase('application', 'octet-stream')

                # To change the payload into encoded form
                p.set_payload(attachment.read())

                # encode into base64
                encoders.encode_base64(p)

                p.add_header('Content-Disposition',
                             "attachment; filename= %s" % attach_file)

                # attach the instance 'p' to instance 'msg'
                msg.attach(p)

        # creates SMTP session
        s = smtplib.SMTP('smtp.gmail.com', 587)

        # start TLS for security
        s.starttls()

        # Authentication
        s.login(fromaddr, gmail_password)

        # Converts the Multipart msg into a string
        text = msg.as_string()
        post_log(f"Creation of email body for {user_name}", 'COMPLETED')
        # sending the mail
        s.sendmail(fromaddr, toaddr, text)
        post_log(f"Email sending for the user : {user_name}", 'COMPLETED')
        # terminating the session
        s.quit()

        # updating object value
        ir_object.is_visited_by_cron = True
        ir_object.save()
        # updating result
        irr_object.is_published = True
        irr_object.data = result_data
        irr_object.save()
    except Exception as e:
        # df.to_csv(f'{company_name}_all_reviews.csv')
        # traceback.print_exc()
        post_log(f"{e} : for user : {user_name}", "ERROR")
        irr_object.data = result_data
        irr_object.save()

    LOGGER.info(
        f'[tag:INTRUNTER20] tasks.execute_interview_request: finished execution for ir_id: {ir_object.id}'
    )

Exemple #28

0

Afficher le fichier

Fichier : ticker_news.py Projet : fraserprice/TickerNews

def get_articles_info(company_name, start=None, end=None):
    gn = GoogleNews()
    return gn.search(company_name, from_=start, to_=end)['entries']