Ejemplo n.º 1
0
def getDataViztype0(business_id):
    # do web scraping
    yelpScraperResult = yelpScraper(business_id)
    if yelpScraperResult.empty:
        return {}

    # viztype0
    df_positive, df_negative = getReviewPosNegPhrases(yelpScraperResult)
    # viztype3
    df_bydate = getYelpWordsReviewFreq(yelpScraperResult)
 
    # API data formatting
    results = {'viztype0':
                    {'positive': [{'term': pos_term, 'score': pos_score} 
                                  for pos_term, pos_score in zip(df_positive['term'], df_positive['score'])], 
                     'negative': [{'term': neg_term, 'score': neg_score} 
                                  for neg_term, neg_score in zip(df_negative['term'], df_negative['score'])]},
               'viztype3':
                    {'star_data': [{'date': row[0], 'cumulative_avg_rating': row[1], 'weekly_avg_rating': row[2]}
                                   for row in df_bydate[['date_of_week', 'cumulative_avg_rating', 'stars']].values]
                    }
              }
    del [df_positive, df_negative, df_bydate]

    return results
Ejemplo n.º 2
0
def home(request, business_id):
    result = "This is Yelp Analytics home page."
    viztype = request.GET.get('viztype')
    if viztype == '1':
        result = "this is a line chart."
    else:
        result = json.dumps(yelpScraper(business_id))
Ejemplo n.º 3
0
def task_yelpScraper(business_ids=None, job_type=0):

    if business_ids is None:
        business_ids = getTallyBusiness()  # return a list of strings

    for business_id in business_ids:
        print(f"scraping business ID {business_id}...")

        ## get review date range to scrape, e.g.
        # date_range = (datetime.strptime('2018-06-28', '%Y-%m-%d'),
        #               datetime.strptime('2018-07-01', '%Y-%m-%d'))
        yelp_review_log = getLatestYelpReviewLog(business_id)
        if not yelp_review_log:
            date_range = None
            m1 = "for all dates"
        else:
            date_range = (yelp_review_log[0][0], datetime.now())
            m1 = f"from {date_range[0].strftime('%Y-%m-%d')} to {date_range[1].strftime('%Y-%m-%d')}"
        print(f"scraping {m1}")

        # scrape Yelp reviews
        status_code, data = None, []
        status_code, data = yelpScraper(business_id, date_range=date_range)
        if status_code == 200:
            returncode = updateYelpReviews(business_id, data)
            job_message = f"status code {status_code}, scraped total {len(data)} reviews, {m1}"
            insertJobLogs(business_id, job_type, returncode, job_message)
            if len(data) > 0:
                insertYelpReviewLog(business_id, data[0][0])  # date
        else:
            job_message = f"status code {status_code}"
            if status_code == 503:  # this is special case for web scraping...
                job_message += " Wasn't able to assign an unblocked proxy IP"
            insertJobLogs(business_id, job_type, 1, job_message)
        print(job_message)
Ejemplo n.º 4
0
def home(request, business_id):
    result = "This is Yelp Analytics home page."
    viztype = request.GET.get('viztype')
    if viztype == '1':
        result = json.dumps(yelpTrendyPhrases(business_id))
    else:
        result = json.dumps(yelpScraper(business_id))
    return HttpResponse(result)
Ejemplo n.º 5
0
def getPosNegPhrases(request, business_id):
    yelpScraperResult = yelpScraper(business_id)
    # result = json.dumps(getYelpWords(yelpScraperResult))
    result = json.dumps(getYelpPhrases(yelpScraperResult))