def task_getVizdata(): ''' Generate visualization data by background jobs for better user experience ''' business_ids = [] business_ids = getTallyBusiness() # return a list of strings for business_id in business_ids: # viztype 0 and 3 # 2020-01-22 viztype 0 and 3 are sharing an API for historical reasons. # if have time, please change it viztype = 0 count = checkVizdataTimestamp(business_id, 0, 14) if count == 0: vizdata = json.dumps(getDataViztype0(business_id), sort_keys=False) updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # viztype 1 viztype = 1 count = checkVizdataTimestamp(business_id, 1, 14) if count == 0: vizdata = json.dumps(yelpTrendyPhrases(business_id), sort_keys=False) updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # viztype 2 viztype = 2 count = checkVizdataTimestamp(business_id, 2, 14) if count == 0: vizdata = json.dumps(yelpReviewCountMonthly(business_id), sort_keys=False) updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # insert a log for the task job_message = "Updated viztype 0,1,2,3" insertJobLogs(business_id, 1, 0, job_message)
def task_getVizdata(business_ids=None): ''' Generate visualization data by background jobs for better user experience ''' if business_ids is None: business_ids = [] business_ids = getTallyBusiness() # return a list of strings for business_id in business_ids: print( f"Generating visualization data for business ID {business_id}...") data = getLatestYelpReviewLog(business_id) if len(data) > 0: timestamp_yelpreview = data[0][0] else: print("Visualization data are recent. No need to re-generate.") return # no reviews to process # viztype 0 and 3 # 2020-01-22 viztype 0 and 3 are sharing an API for historical reasons. # if have time, please change it viztype = 0 data = getVizdataTimestamp(business_id, 0) if len(data) > 0: timestamp_vizdata = data[0][0] # If don't get .date(), it will raise # TypeError: can't compare offset-naive and offset-aware datetimes if len(data) == 0 or timestamp_vizdata.date( ) < timestamp_yelpreview.date(): vizdata = json.dumps(getDataViztype0(business_id), sort_keys=False) if vizdata is not None and len(vizdata) > 0: updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # viztype 1 viztype = 1 data = getVizdataTimestamp(business_id, 1) if len(data) > 0: timestamp_vizdata = data[0][0] if len(data) == 0 or timestamp_vizdata.date( ) < timestamp_yelpreview.date(): vizdata = json.dumps(yelpTrendyPhrases(business_id), sort_keys=False) if vizdata is not None and len(vizdata) > 0: updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # viztype 2 viztype = 2 data = getVizdataTimestamp(business_id, 2) if len(data) > 0: timestamp_vizdata = data[0][0] if len(data) == 0 or timestamp_vizdata.date( ) < timestamp_yelpreview.date(): vizdata = json.dumps(yelpReviewCountMonthly(business_id), sort_keys=False) if vizdata is not None and len(vizdata) > 0: updateVizdata(business_id, viztype, vizdata) insertVizdataLog(business_id, viztype, triggeredby=0) # triggered by job # insert a log for the task job_message = "Updated viztype 0,1,2,3" print(job_message) insertJobLogs(business_id, 1, 0, job_message) # job type 1, success
def home(request, business_id): '''get data for views (APIs)''' returncode, result = 0, "" try: # check whether the business ID has never been scraped before # check whether some other session(s) is scraping the same business ID if not isTallyBusiness(business_id) and not lock_yelpscraper.isLocked( business_id): deleteVizdata(business_id) # task_yelpScraper([business_id], job_type=1) # triggered by end user insertTallyBusiness([business_id]) # for i in range(1200): # if lock_yelpscraper.isLocked(business_id): # time.sleep(1) # if i % 30 == 0: # print("Waiting for some other session(s) finishing web scraping...") # else: # break viztype = request.GET.get('viztype') viztype = int(viztype) data = getLatestVizdata(business_id, viztype=viztype) # a list of tuples if len(data) > 0: result = data[0][0] returncode = 0 # success else: if viztype == 0: # viztype0 and viztype3 result = json.dumps(getDataViztype0(business_id), sort_keys=False) returncode = 0 # success elif viztype == 1: result = json.dumps(yelpTrendyPhrases(business_id), sort_keys=False) returncode = 0 # success elif viztype == 2: result = json.dumps(yelpReviewCountMonthly(business_id), sort_keys=False) returncode = 0 # success elif viztype == 4: result = json.dumps(yelpReviewSentiment(business_id), sort_keys=False) returncode = 0 # success else: print(f"Error: There is no viztype {str(viztype)}.") returncode = 1 # error # update table ds_vizdata and ds_vizdata_log if returncode == 0: updateVizdata(business_id, viztype, result) insertVizdataLog(business_id, viztype, triggeredby=1) # triggered by end user except Exception as e: print(e) returncode = 1 # error return HttpResponse(result) viztype = request.GET.get('viztype') if viztype == '1': result = json.dumps(yelpTrendyPhrases(business_id), sort_keys=False) elif viztype == '2': result = json.dumps(yelpReviewCountMonthly(business_id), sort_keys=False) else: # viztype0 and viztype3 result = json.dumps(getDataViztype0(business_id), sort_keys=False) return HttpResponse(result)