예제 #1
0
def main():
    app_domain = 'com.grubhub.android'

    date_range = [
        '2020-05-01', '2020-05-02', '2020-05-03', '2020-05-04', '2020-05-05',
        '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10'
    ]

    app_reviews = []

    if (not path.exists('data.json')):
        result = reviews_all(
            app_domain,
            sleep_milliseconds=0,  # defaults to 0
            lang='en',  # defaults to 'en'
            country='us',  # defaults to 'us'
            sort=Sort.MOST_RELEVANT,  # defaults to Sort.MOST_RELEVANT
            # Comment next line out to get all reviews of all scores
            filter_score_with=1  # defaults to None(means all score)
        )

        for review in result:
            temp = review['at']
            temp = temp.strftime('%Y-%m-%d')

            for date in date_range:
                print(temp, date)
                if temp == date:
                    print("True")
                    app_reviews.append(review)

        with open('data.json', 'w') as outfile:
            json.dump(app_reviews, outfile, default=myconverter)

    with open('data.json', 'r') as json_file:
        data = json.load(json_file)

    # now we will open a file for writing
    csv_file = open('data.csv', 'w', encoding='utf-8')

    # create the csv writer object
    csv_writer = csv.writer(csv_file)

    # Counter variable used for writing
    # headers to the CSV file
    count = 0

    for review in data:
        if count == 0:
            # Writing headers of CSV file
            header = review.keys()
            csv_writer.writerow(header)
            count += 1

        # Writing data of CSV file
        csv_writer.writerow(review.values())
    csv_file.close()
예제 #2
0
 def __scrap_reviews(self, score):
     result = reviews_all(
         self.package_name,
         sleep_milliseconds=0,  # defaults to 0
         lang=self.lang,  # defaults to 'en'
         country=self.country,  # defaults to 'us'
         sort=Sort.MOST_RELEVANT,  # defaults to Sort.MOST_RELEVANT
         filter_score_with=score,  # defaults to None(means all score)
     )
     self.reviews[score] = result
예제 #3
0
def persist_reviews_for_bundle_ids(session):
    bundle_ids = session.query(BundleId).all()
    for index, bundle_id in tqdm(enumerate(bundle_ids)):
        try:
            app_data = app(bundle_id.native_id)
            app_row_id = persist_app_data(session, app_data)
            if app_row_id > 0:
                review_data = reviews_all(bundle_id.native_id, count=5)
                persist_review_data(session, app_row_id, review_data)
        except Exception as e:
            logging.exception(e)
def crawl_all_app_reviews(app_id, **kwargs):
    app_reviews = reviews_all(
        app_id,
        sleep_milliseconds=0,
        lang=kwargs.get('lang', 'en'),
        country=kwargs.get('country', 'us'),
        sort=Sort.MOST_RELEVANT,
        filter_score_with=kwargs.get('score', 5),
    )

    app_object = App.objects.save_app(app_id)
    app_reviews = filter_new_reviews(app_reviews)
    save_app_reviews(app_reviews, app_object)
예제 #5
0
def main(args):
    logger.info(f"About to get reviews for {args.app_id}.")

    result = reviews_all(
        args.app_id,
        lang="pt",
        country="br",
        sort=Sort.NEWEST,
        sleep_milliseconds=150,
    )

    if not result:
        logger.info(f"Could not retrieve any review for {args.app_id}.")
        return

    dataset_location = os.path.join(args.output_dir, args.app_id)
    dataset_file = os.path.join(dataset_location, "reviews.txt")

    logger.info(f"Found {len(result)}. Dumping to {dataset_file}.")

    os.makedirs(dataset_location, exist_ok=True)

    with open(dataset_file, "w+") as output_file:
        columns = ["score", "content", "at", "reviewCreatedVersion"]
        skipped = defaultdict(int)

        for r_id, review in enumerate(result):
            if ("reviewCreatedVersion" not in review
                    or not review["reviewCreatedVersion"]):
                skipped["no_version"] += 1
                continue  # We skip reviews without a version

            row = [review[c] for c in columns]

            if not all(row):
                skipped["empty_field"] += 1
                continue  # We skip any review with empty fields

            line = "******".join(map(handle_review_field, row))

            output_file.write(line)
            output_file.write("\n")

        logger.info(f"Skipped reviews: {skipped}")

    logger.info(f"Done writing.")
예제 #6
0
def play_store_scraper(package):
    results = reviews_all(package)

    # Creates or updates the CSV with this name
    with open(r'google_reviews.csv', 'w', newline='') as file:
        fieldnames = ['username', 'review', 'score']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

        # Adds the fields to the CSV
        for x, item in enumerate(results):
            username = item['userName']
            score = item['score']
            review = item['content']

            try:
                print(f'{x}: {username} says: {review}')
                writer.writerow({'username': username, 'review': review, 'score': score})
            except:
                print('Failed to add entry XXX')
예제 #7
0
from google_play_scraper import reviews_all
import json

result = reviews_all(
    'ca.gc.cbsa.coronavirus',
    lang='en',  # defaults to 'en'
    country='us'  # defaults to 'us'
)

for i in result:
    if i.get('repliedAt', 0):
        i['repliedAt'] = i['repliedAt'].strftime("%m/%d/%Y, %H:%M:%S")
    if i.get('at', 0):
        i['at'] = i['at'].strftime("%m/%d/%Y, %H:%M:%S")

with open('google.json', 'w', encoding='utf-8') as f:
    json.dump(result, f, ensure_ascii=False, indent=4)
예제 #8
0
def get_reviews():
  result = reviews_all(
      APP_PACKAGE,
      lang=LANG
  )
  return [review for review in result if (DATE_TO_LAST_REVIEWS < review[AT_FIELD])]
from google_play_scraper import app

import pandas as pd

import numpy as np

from google_play_scraper import Sort, reviews_all

us_reviews = reviews_all(
    'co.digithera.v2.quitgenius',
    sleep_milliseconds=0,  # defaults to 0
    lang='en',  # defaults to 'en'
    country='us',  # defaults to 'us'
    sort=Sort.MOST_RELEVANT,  # defaults to Sort.MOST_RELEVANT
)

df_busu = pd.DataFrame(np.array(us_reviews), columns=['review'])

df_busu = df_busu.join(pd.DataFrame(df_busu.pop('review').tolist()))

df_busu.head()

{}.get()
예제 #10
0
from google_play_scraper import app, reviews_all, reviews, Sort
import datetime
import json


def default(o):
    if isinstance(o, (datetime.date, datetime.datetime)):
        return o.isoformat()


results = reviews_all(
    'com.innersloth.spacemafia',
    lang='en',  # defaults to 'en'
    country='us',  # defaults to 'us'
)
#print(results)
with open('scrape-amongus.json', 'w') as f:
    json.dump(results, f, default=default)
from google_play_scraper import Sort, reviews_all
import pandas as pd

result = reviews_all(
    'com.tencent.ig',
    sleep_milliseconds=0, # defaults to 0
    lang='id', # defaults to 'en'
    country='id', # defaults to 'us'
    sort=Sort.MOST_RELEVANT, # defaults to Sort.MOST_RELEVANT
    filter_score_with=None # defaults to None(means all score)
)

df = pd.DataFrame(result)
dataset = df[['userName', 'content' , 'score']]
dataset.to_csv('pubg_review.csv')
예제 #12
0
)

# If you pass `continuation_token` as an argument to the reviews function at this point,
# it will crawl the items after 3 review items.

result, _ = reviews(
    'com.activision.callofduty.shooter',
    continuation_token=continuation_token # defaults to None(load from the beginning)
)
"""
from google_play_scraper import Sort, reviews_all

result = reviews_all(
    'com.activision.callofduty.shooter',
    sleep_milliseconds=0,  # defaults to 0
    lang='en',  # defaults to 'en'
    country='us',  # defaults to 'us'
    sort=Sort.MOST_RELEVANT,  # defaults to Sort.MOST_RELEVANT
    filter_score_with=None  # defaults to None(means all score)
)

print(result[1]["userName"])

y = []
for i in range(len(result)):
    a = result[i]
    info = {'Name': a["userName"], 'review': a['content'], 'score': a['score']}
    y.append(info)
    i = i + 1

import csv
예제 #13
0
from csv import DictReader, DictWriter

from google_play_scraper import reviews_all

IN_FILE = 'gps_app_review_test_data.csv'

# read app name and id from csv file
apps = []
with open(IN_FILE, newline='', encoding='utf-8') as file:
    reader = DictReader(file)
    for row in reader:
        apps.append({'app_id': row['app_id']})

# for each app, use the fetch all reviews and save to separate csv file
for app in apps:
    reviews = reviews_all(app['app_id'])
    app['reviews'] = reviews  # save list of reviews to existing dictionary
    if app.get('reviews'):  # Not all apps have reviews
        # use the app_name to name the output file
        outfile = app['app_id'].replace('.', '_') + '_reviews.csv'
        with open(outfile, 'w', encoding='utf-8', newline='') as file:
            dict_writer = DictWriter(file, app['reviews'][0].keys())
            dict_writer.writeheader()
            dict_writer.writerows(app['reviews'])
예제 #14
0
from google_play_scraper import reviews_all
import re

scrape_result = reviews_all(
    'com.marktguru.mg2.de',
    lang='de',  # defaults to 'en'
    country='us',  # defaults to 'us'
)

print("-----------")
print(" Marktguru ")
print("-----------")
print("")
print("Length = " + str(len(scrape_result)))
print("")

reviews = [e["content"] for e in scrape_result]

patterns = dict()


def add_pattern(s):
    patterns[s] = re.compile(s)


add_pattern("[tT]ablet|[iI][pP]ad")
add_pattern("[cC]oin|[cC]ashback|[gG]utschein")
add_pattern("[lL]ist|[sS]peichern")
add_pattern("[bB]riefkast")
add_pattern("[lL]ieblings|[fF]avorit")
add_pattern(
예제 #15
0
def fn_run():
    #getting parameter value
    app_name = request.args.get('app_name')
    #print(str(datetime.datetime.now()) +'-->Fetching '+app_name)
    #getting the review
    googleplay_app_review = reviews_all(
        app_name,  #'com.fecredit.cards',
        sleep_milliseconds=10,  # defaults to 0
        lang='en',  # defaults to 'en'
        country='us',  # defaults to 'us'
        sort=Sort.
        MOST_RELEVANT,  # defaults to Sort.MOST_RELEVANT, other values can be NEWEST, RATING
        #current bug on this as it does not return the consistent number of reviews with MOST_RELEVANT
        #https://githubmemory.com/repo/JoMingyu/google-play-scraper/activity?page=5
        #Press 'show more' in the review list until it can no longer be loaded, and compare it to the return of the reviews method
        #and you will see the same thing. Since google play doesn't provide full review, it's not a bug in this library.
        # filter_score_with=5 # defaults to None(means all score)
    )
    googleplay_app_review = json.dumps(googleplay_app_review,
                                       default=myconverter,
                                       ensure_ascii=False)

    #now add entiment score and tokenization
    #convert a json_object_string to a dictionary
    googleplay_app_review_dict = json.loads(googleplay_app_review)
    #print(str(datetime.datetime.now()) +'-->No of review '+str(len(googleplay_app_review_dict)))
    for i in googleplay_app_review_dict:
        try:
            review_id = 'GooglePlayReview' + i["reviewId"]
            review_date = i['at']
            review_content = i['content']
            if review_content is not None:
                try:
                    review_content = make_text_cleaner(review_content)
                except Exception as e:
                    print("Except 4" + str(e))
                    pass
            else:
                #go for next round
                review_content = ''
            rating = i['score']
            replyContent = i['replyContent']
            if replyContent is not None:
                try:
                    replyContent = make_text_cleaner(replyContent)
                except Exception as e:
                    print("Except 3" + str(e))
            else:
                #go for next round
                replyContent = ""

            repliedAt = i['repliedAt']
            if repliedAt is None:
                repliedAt = " "
            #Steven TEMP starts for add in translated English
            #sentiment_score, translated_review = translate_gen_sa_score(review_content)
            sentiment_score = gen_sa_score(review_content)
            username = i['userName']

            if username is not None:
                try:
                    username = make_text_cleaner(username)
                except Exception as e:
                    print("Except 5" + str(e))
                    pass
            else:
                #go for next round
                username = ''

            #now for tokenized review file and clean the comment
            #Steven 08JUL2021 starts
            #list_tokenized_words = word_tokenize(clean_text(i['content']))
            list_tokenized_words = word_tokenize(clean_text(review_content))
            #Steven 08JUL2021 ends
            # Remove stopwords, maybe this not apply for vnese, can be commented off
            list_tokenized_words = [
                w for w in list_tokenized_words if w not in sw
            ]

            i['tokenized_words'] = list_tokenized_words
            i['sentiment_score'] = sentiment_score

        except Exception as e:
            print("Except 2" + str(e) + "---" + i['content'] + "---" +
                  review_content)
            pass


#repack it
    updated_googleplay_app_review = json.dumps(googleplay_app_review_dict,
                                               default=myconverter,
                                               ensure_ascii=False)
    #print(str(datetime.datetime.now()) +'-->Completed scraping')
    return updated_googleplay_app_review
예제 #16
0
def execute_interview_request(ir_object):
    LOGGER.info(
        f'[tag:INTRUNTER10] tasks.execute_interview_request: received execute request for ir_id: {ir_object.id}'
    )

    alphabet_list = ['A', 'B', 'C', 'D', 'E', 'F']

    tag_dict = {
        'funding': {
            'keywords': [
                'funding', 'investor', 'valuation', 'term sheet',
                'venture capital', 'venture debt'
            ],
            'mail_tag_line':
            'Type {}: Funding'
        },
        'acquisition': {
            'keywords': ['acquisition', 'acquired'],
            'mail_tag_line': 'Type {}: Acquisition'
        },
        'collabaration': {
            'keywords': ['collabarate', 'collabaration'],
            'mail_tag_line': 'Type {}: Collabaration'
        },
        'social good': {
            'keywords': ['donate'],
            'mail_tag_line': 'Type {}: Strategic initiative'
        },
        'covid': {
            'keywords': ['covid'],
            'mail_tag_line': 'Type {}: Covid'
        },
    }

    # ir_object = InterviewRequest.objects.get(id=ir_id)
    irr_object = get_object_or_None(InterviewRequestResult,
                                    type_form_id=ir_object.type_form_id,
                                    interview_request_id=ir_object.id,
                                    company_id=ir_object.company.id,
                                    user=ir_object.user.id)
    if not irr_object:
        irr_object = InterviewRequestResult(
            type_form_id=ir_object.type_form_id,
            is_published=False,
            interview_request_id=ir_object.id,
            company_id=ir_object.company.id,
            user_id=ir_object.user.id,
        )
        irr_object.save()

    user_name = ir_object.user.first_name
    company_name = ir_object.company.name
    result_data = dict()
    try:
        post_log("Getting news from google", 'STARTED')
        attachment_file_list = []
        user_tag_list = []
        from pygooglenews import GoogleNews
        gn = GoogleNews()
        s = gn.search(company_name.lower())
        final_data = []
        for news in s['entries']:
            new_dict = {
                'title': news['title'],
                'link': news['link'],
                'published': news['published']
            }
            summary_texts = []
            tags = []
            try:
                soup = BeautifulSoup(
                    requests.get(news['link'], timeout=300).content,
                    "html.parser")
                for p in soup.findAll('p'):
                    # print(p.text)
                    dummy_text = p.text
                    tags.extend(get_tag(dummy_text))
                    if "“" in dummy_text:
                        summary_texts.append(dummy_text)
                        # break
                if summary_texts:
                    new_dict['summary'] = summary_texts
                    new_dict['tags'] = list(set(tags))
                    user_tag_list.extend(new_dict['tags'])
                    final_data.append(new_dict)
            except Exception as e:
                print(f"{e} : {news}")
        result_data['news_data'] = final_data
        user_email = ir_object.user.email
        post_log(f"Getting news from google for {user_email}", 'COMPLETED')
        # creating a Dataframe object
        news_df = pd.DataFrame(final_data)
        news_df['Date'] = pd.to_datetime(news_df['published'], errors='coerce')
        news_df.sort_values(by=['Date'], inplace=True, ascending=False)
        del news_df['Date']
        file_name = f'{company_name}_Scrapped News.csv'
        news_df.to_csv(f'{DEFAULT_PATH}/{file_name}')
        post_log(f"File creation for the scrapped news for {user_email}",
                 'COMPLETED')
        attachment_file_list.append(file_name)
        google_play_app_id = ir_object.company.google_play_app_id
        if google_play_app_id:
            post_log(f"Srapping reviews for the app for {user_email}",
                     'STARTED')
            result = reviews_all(
                google_play_app_id,
                sleep_milliseconds=0,  # defaults to 0
                lang='en',  # defaults to 'en'
                country='us',  # defaults to 'us'
                sort=Sort.NEWEST  # defaults to Sort.MOST_RELEVANT
                # filter_score_with=5 # defaults to None(means all score)
            )
            post_log(f"Srapping reviews for the app for {user_email}",
                     'COMPLETED')

            df = pd.DataFrame(result)
            # df = pd.read_csv('{DEFAULT_PATH}/Netflix_all_reviews.csv')
            # print(df.head())
            # Product Scores
            # post_log(f"Histogram creation for the app reviews for {user_email}", 'STARTED')
            # fig = px.histogram(df, x="score")
            # fig.update_traces(marker_color="turquoise", marker_line_color='rgb(8,48,107)',
            #                   marker_line_width=1.5)
            # fig.update_layout(title_text='Product Score')
            # HTML(fig.to_html())
            # fig.write_image(f"{DEFAULT_PATH}/{company_name}_playstore_ratings.png")
            # plt.show()
            # plt.savefig(f'{DEFAULT_PATH}/{company_name}_playstore_ratings.png')
            # attachment_file_list.append(f"{company_name}_playstore_ratings.png")
            # post_log(f"Histogram creation for the app reviews for {user_email}", 'COMPLETED')
            reviews_df = df
            # reviews_df["review"] = reviews_df["content"].apply(lambda x: x.replace("No Negative", "").replace("No Positive", ""))
            reviews_df["is_bad_review"] = reviews_df["score"].apply(
                lambda x: 1 if x < 3 else 0)
            # select only relevant columnss
            reviews_df = reviews_df[[
                "content", "reviewCreatedVersion", "at", "is_bad_review"
            ]]
            # reviews_df.head()
            reviews_df["review"] = reviews_df["content"]
            # reviews_df
            post_log(f"Sentiment analysis for {user_email}", 'STARTED')
            # return the wordnet object value corresponding to the POS tag

            # clean text data
            reviews_df["review_clean"] = reviews_df["review"].apply(
                lambda x: clean_text(x))
            # add sentiment anaylsis columns

            sid = SentimentIntensityAnalyzer()
            reviews_df["sentiments"] = reviews_df["review"].apply(
                lambda x: sid.polarity_scores(str(x)))
            reviews_df = pd.concat([
                reviews_df.drop(['sentiments'], axis=1),
                reviews_df['sentiments'].apply(pd.Series)
            ],
                                   axis=1)
            # add number of characters column
            reviews_df["nb_chars"] = reviews_df["review"].apply(
                lambda x: len(str(x)))

            # add number of words column
            reviews_df["nb_words"] = reviews_df["review"].apply(
                lambda x: len(str(x).split(" ")))
            # create doc2vec vector columns

            documents = [
                TaggedDocument(doc, [i])
                for i, doc in enumerate(reviews_df["review_clean"].apply(
                    lambda x: str(x).split(" ")))
            ]

            # train a Doc2Vec model with our text data
            model = Doc2Vec(documents,
                            vector_size=5,
                            window=2,
                            min_count=1,
                            workers=4)

            # transform each document into a vector data
            doc2vec_df = reviews_df["review_clean"].apply(
                lambda x: model.infer_vector(str(x).split(" "))).apply(
                    pd.Series)
            doc2vec_df.columns = [
                "doc2vec_vector_" + str(x) for x in doc2vec_df.columns
            ]
            reviews_df = pd.concat([reviews_df, doc2vec_df], axis=1)
            # add tf-idfs columns
            tfidf = TfidfVectorizer(min_df=10)
            tfidf_result = tfidf.fit_transform(
                reviews_df["review_clean"]).toarray()
            tfidf_df = pd.DataFrame(tfidf_result,
                                    columns=tfidf.get_feature_names())
            tfidf_df.columns = ["word_" + str(x) for x in tfidf_df.columns]
            tfidf_df.index = reviews_df.index
            reviews_df = pd.concat([reviews_df, tfidf_df], axis=1)
            # show is_bad_review distribution
            # reviews_df["sentiment"].value_counts(normalize = True)
            post_log(f"Sentiment analysis for {user_name}", 'COMPLETED')

            # print wordcloud
            post_log(f"Creating word cloud for {user_name}", 'STARTED')
            wc_name = show_wordcloud(reviews_df["review"], company_name)
            attachment_file_list.append(wc_name)
            post_log(f"Creating word cloud for {user_name}", 'COMPLETED')
            # highest positive sentiment reviews (with more than 5 words)
            reviews_df[reviews_df["nb_words"] >= 5].sort_values(
                "pos", ascending=False)[["review", "pos"]].head(10)

            # show is_bad_review distribution
            reviews_df["is_bad_review"].value_counts(normalize=True)

            # lowest negative sentiment reviews (with more than 5 words)
            post_log(f"Creating negative reviews csv for {user_name}",
                     'STARTED')
            negative_df = reviews_df[reviews_df["nb_words"] >= 5].sort_values(
                "neg", ascending=False)[["content", "neg"]].head(50)
            negative_df.to_csv(
                f'{DEFAULT_PATH}/{company_name}_negative_reviews.csv',
                columns=["content"])
            attachment_file_list.append(f'{company_name}_negative_reviews.csv')
            negative_reviews_data = negative_df.to_json(orient="split")
            parsed = json.loads(negative_reviews_data)
            result_data['negative_reviews'] = parsed
            post_log(f"Creating negative reviews csv for {user_name}",
                     'COMPLETED')
        else:
            attachment_file_list.extend(
                ['app_playstore.png', 'app_word_cloud.png'])
        # gbrowniepoint
        post_log(f"Creation of email body for {user_name}", 'STARTED')

        # Set Global Variables
        gmail_user = '******'
        gmail_password = GMAIL_PASSWORD

        fromaddr = "*****@*****.**"
        toaddr = "*****@*****.**"

        # instance of MIMEMultipart
        msg = MIMEMultipart()

        # storing the senders email address
        msg['From'] = fromaddr

        # storing the receivers email address
        msg['To'] = toaddr

        # storing the subject
        msg['Subject'] = f"Interview Brownie : {user_name}'s report"

        # string to store the body of the mail
        body = f'''
                <p>Hi {user_name},</p>
                <div dir="ltr"><br />Here is your report<br /><br /><strong><u>1. PR synthesis</u></strong>&nbsp;<br /><br />
              '''
        # print(f'body before adding tags : {body}')
        # print(user_tag_list)
        for index, tag in enumerate(list(set(user_tag_list))):
            tag_data = get_first_tag_quotes(tag, final_data)
            type_str = tag_dict[tag]['mail_tag_line'].format(
                alphabet_list[index])
            summary = '<br />'.join(map(str, tag_data["summary"]))
            body = body + f'<u>{type_str}</u><br /><br />Quote:<br />&nbsp;&ldquo;{summary}<br />Source: <a href="{tag_data["link"]}" target="_blank">{tag_data["title"]}</a><br /><br />'

        # print(f'body after adding tags : {body}')

        body = body + f'''
              <p><strong><u><em>How do you use these insights in your interview?<br /></em></u></strong><br />
              Interviewer - Do you have any questions for us?<br />{user_name} - Yes, I read about the launch of ASAP - how do people get assigned to such projects internally?<br /><br />
              From Type A.<br /><br />Another one,<br />{user_name} - I also read about the platform for data collaboration for covid - amazing to see the pace of execution on that one, how is that going?<br /><br />
              From type B<br /><br />{user_name} - There were 40 million raised for the clinical analysis, do we raise money for specific projects / verticals or was this a covid specific development?<br /><br />
              From type C.<br /><br />Now remember, these are just examples and you should be able to come up with genuine talking points, questions, things that you can relate to now with minimal effort of going through the links <br /><br />You can also find a consolidated list of all public mentions of {company_name} in the past year attached.<br /><u></u></p><div dir="ltr">&nbsp;</div>
              '''
        if google_play_app_id:
            body = body + f'''<div dir="ltr"><strong><u>2. End user understanding</u></strong></div>
                  <ul>
                  <li>A significant chunk of the bad ratings of the app are generic bad reviews, investing in talking to these consumers might uncover issues yet unknown</li>
                  <li>1 peculiar thing was the mention of cbse in a cluster of reviews, the CBSE learning experience might have some issues in particular</li>
                  </ul>
                  &nbsp;</div>
                  <div>This is a word cloud from all the positive reviews,<br />
                  <br><img src="cid:1"><br>
                  <ul>
                  <li>The trend of generic reviews continues here as well, 1 suggestion could be to request reviewers to write a few lines describing what they loved about their experience</li>
                  </ul>
                  <div>Thanks for trying out the beta, please feel free to revert with any questions, suggestions/ feedback etc and it will be super helpful to us if you can share this in your network - a linkedin post talking about your experience will help us reach more people<br /><br />If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy&nbsp;<br /><br />Cheers,</div>
                  </div>
                  <p>--</p>
                  <div dir="ltr" data-smartmail="gmail_signature">
                  <div dir="ltr">
                  <div>
                  <div dir="ltr">
                  <div dir="ltr">
                  <div dir="ltr">
                  <div>Gaurav Dagde and Gagan Gehani</div>
                  </div>
                  </div>
                  </div>
                  </div>
                  </div>
                  </div>'''
        else:
            body = body + f'''
                <div><strong><u>2. End user understanding<br /></u></strong></div>
                <div><br />Playstore reviews - Our system couldn't find {company_name} app on the playstore.
                 Nonetheless, I am attaching screenshots of the output of another beta tester to give you a taste of what you can expect from this section</div>
                <br><img src="cid:0"><br>
                <br><img src="cid:1"><br>
                <p>If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy
                <br /><br />All the best for your interview!</p>
                <div>Thanks for trying out the beta, please feel free to revert with any questions, suggestions/ feedback etc and it will be super helpful to us if you can share this in your network - a linkedin post talking about your experience will help us reach more people<br /><br />If you don't have anything to ask or say, please revert with your rating on 5 on how useful did you find this tool, it will help us gauge it's efficacy&nbsp;<br /><br />Cheers,</div>
                </div>
                <p>--</p>
                <div dir="ltr" data-smartmail="gmail_signature">
                <div dir="ltr">
                <div>
                <div dir="ltr">
                <div dir="ltr">
                <div dir="ltr">
                <div>Gaurav Dagde and Gagan Gehani</div>
                </div>
                </div>
                </div>
                </div>
                </div>
                </div
                '''

        result_data['mail_body'] = body
        # attach the body with the msg instance
        msg.attach(MIMEText(body, 'html', 'utf-8'))
        # file_list = ['ps_image.png',file_name]
        img_count = 0
        for attach_file in attachment_file_list:
            # open the file to be sent
            # filename = file_name
            attachment = open(f'/app/mail_content/{attach_file}', "rb")
            # to add an attachment is just add a MIMEBase object to read a picture locally.
            post_log(f"filename : {attach_file}", "IN_PROGRESS")
            if '.png' in attach_file:
                # post_log(f"In PNG block", "IN_PROGRESS")
                # with open(f'/app/mail_content/{attach_file}', 'rb') as attachment:
                # set attachment mime and file name, the image type is png
                mime = MIMEBase('image', 'png', filename=attach_file)
                # add required header data:
                mime.add_header('Content-Disposition',
                                'attachment',
                                filename=attach_file)
                mime.add_header('X-Attachment-Id', '{}'.format(img_count))
                mime.add_header('Content-ID', '<{}>'.format(img_count))
                # read attachment file content into the MIMEBase object
                mime.set_payload(attachment.read())
                # encode with base64
                encoders.encode_base64(mime)
                # add MIMEBase object to MIMEMultipart object
                msg.attach(mime)
                img_count += 1
            else:
                # post_log(f"In else block", "IN_PROGRESS")
                # instance of MIMEBase and named as p
                p = MIMEBase('application', 'octet-stream')

                # To change the payload into encoded form
                p.set_payload(attachment.read())

                # encode into base64
                encoders.encode_base64(p)

                p.add_header('Content-Disposition',
                             "attachment; filename= %s" % attach_file)

                # attach the instance 'p' to instance 'msg'
                msg.attach(p)

        # creates SMTP session
        s = smtplib.SMTP('smtp.gmail.com', 587)

        # start TLS for security
        s.starttls()

        # Authentication
        s.login(fromaddr, gmail_password)

        # Converts the Multipart msg into a string
        text = msg.as_string()
        post_log(f"Creation of email body for {user_name}", 'COMPLETED')
        # sending the mail
        s.sendmail(fromaddr, toaddr, text)
        post_log(f"Email sending for the user : {user_name}", 'COMPLETED')
        # terminating the session
        s.quit()

        # updating object value
        ir_object.is_visited_by_cron = True
        ir_object.save()
        # updating result
        irr_object.is_published = True
        irr_object.data = result_data
        irr_object.save()
    except Exception as e:
        # df.to_csv(f'{company_name}_all_reviews.csv')
        # traceback.print_exc()
        post_log(f"{e} : for user : {user_name}", "ERROR")
        irr_object.data = result_data
        irr_object.save()

    LOGGER.info(
        f'[tag:INTRUNTER20] tasks.execute_interview_request: finished execution for ir_id: {ir_object.id}'
    )