Exemple #1
0
def task_getVizdata():
    '''
    Generate visualization data by background jobs for better user experience
    '''
    business_ids = []
    business_ids = getTallyBusiness()  # return a list of strings

    for business_id in business_ids:

        # viztype 0 and 3
        # 2020-01-22 viztype 0 and 3 are sharing an API for historical reasons.
        #     if have time, please change it
        viztype = 0
        count = checkVizdataTimestamp(business_id, 0, 14)
        if count == 0:
            vizdata = json.dumps(getDataViztype0(business_id), sort_keys=False)
            updateVizdata(business_id, viztype, vizdata)
            insertVizdataLog(business_id, viztype,
                             triggeredby=0)  # triggered by job

        # viztype 1
        viztype = 1
        count = checkVizdataTimestamp(business_id, 1, 14)
        if count == 0:
            vizdata = json.dumps(yelpTrendyPhrases(business_id),
                                 sort_keys=False)
            updateVizdata(business_id, viztype, vizdata)
            insertVizdataLog(business_id, viztype,
                             triggeredby=0)  # triggered by job

        # viztype 2
        viztype = 2
        count = checkVizdataTimestamp(business_id, 2, 14)
        if count == 0:
            vizdata = json.dumps(yelpReviewCountMonthly(business_id),
                                 sort_keys=False)
            updateVizdata(business_id, viztype, vizdata)
            insertVizdataLog(business_id, viztype,
                             triggeredby=0)  # triggered by job

        # insert a log for the task
        job_message = "Updated viztype 0,1,2,3"
        insertJobLogs(business_id, 1, 0, job_message)
Exemple #2
0
def task_getVizdata(business_ids=None):
    '''
    Generate visualization data by background jobs for better user experience
    '''
    if business_ids is None:
        business_ids = []
        business_ids = getTallyBusiness()  # return a list of strings

    for business_id in business_ids:
        print(
            f"Generating visualization data for business ID {business_id}...")

        data = getLatestYelpReviewLog(business_id)
        if len(data) > 0:
            timestamp_yelpreview = data[0][0]
        else:
            print("Visualization data are recent. No need to re-generate.")
            return  # no reviews to process

        # viztype 0 and 3
        # 2020-01-22 viztype 0 and 3 are sharing an API for historical reasons.
        #     if have time, please change it
        viztype = 0
        data = getVizdataTimestamp(business_id, 0)
        if len(data) > 0:
            timestamp_vizdata = data[0][0]
        # If don't get .date(), it will raise
        # TypeError: can't compare offset-naive and offset-aware datetimes
        if len(data) == 0 or timestamp_vizdata.date(
        ) < timestamp_yelpreview.date():
            vizdata = json.dumps(getDataViztype0(business_id), sort_keys=False)
            if vizdata is not None and len(vizdata) > 0:
                updateVizdata(business_id, viztype, vizdata)
                insertVizdataLog(business_id, viztype,
                                 triggeredby=0)  # triggered by job

        # viztype 1
        viztype = 1
        data = getVizdataTimestamp(business_id, 1)
        if len(data) > 0:
            timestamp_vizdata = data[0][0]
        if len(data) == 0 or timestamp_vizdata.date(
        ) < timestamp_yelpreview.date():
            vizdata = json.dumps(yelpTrendyPhrases(business_id),
                                 sort_keys=False)
            if vizdata is not None and len(vizdata) > 0:
                updateVizdata(business_id, viztype, vizdata)
                insertVizdataLog(business_id, viztype,
                                 triggeredby=0)  # triggered by job

        # viztype 2
        viztype = 2
        data = getVizdataTimestamp(business_id, 2)
        if len(data) > 0:
            timestamp_vizdata = data[0][0]
        if len(data) == 0 or timestamp_vizdata.date(
        ) < timestamp_yelpreview.date():
            vizdata = json.dumps(yelpReviewCountMonthly(business_id),
                                 sort_keys=False)
            if vizdata is not None and len(vizdata) > 0:
                updateVizdata(business_id, viztype, vizdata)
                insertVizdataLog(business_id, viztype,
                                 triggeredby=0)  # triggered by job

        # insert a log for the task
        job_message = "Updated viztype 0,1,2,3"
        print(job_message)
        insertJobLogs(business_id, 1, 0, job_message)  # job type 1, success
Exemple #3
0
def home(request, business_id):
    '''get data for views (APIs)'''
    returncode, result = 0, ""
    try:
        # check whether the business ID has never been scraped before
        # check whether some other session(s) is scraping the same business ID
        if not isTallyBusiness(business_id) and not lock_yelpscraper.isLocked(
                business_id):
            deleteVizdata(business_id)
            # task_yelpScraper([business_id], job_type=1) # triggered by end user
            insertTallyBusiness([business_id])

        # for i in range(1200):
        #     if lock_yelpscraper.isLocked(business_id):
        #         time.sleep(1)
        #         if i % 30 == 0:
        #             print("Waiting for some other session(s) finishing web scraping...")
        #     else:
        #         break

        viztype = request.GET.get('viztype')
        viztype = int(viztype)
        data = getLatestVizdata(business_id,
                                viztype=viztype)  # a list of tuples
        if len(data) > 0:
            result = data[0][0]
            returncode = 0  # success
        else:
            if viztype == 0:  # viztype0 and viztype3
                result = json.dumps(getDataViztype0(business_id),
                                    sort_keys=False)
                returncode = 0  # success
            elif viztype == 1:
                result = json.dumps(yelpTrendyPhrases(business_id),
                                    sort_keys=False)
                returncode = 0  # success
            elif viztype == 2:
                result = json.dumps(yelpReviewCountMonthly(business_id),
                                    sort_keys=False)
                returncode = 0  # success
            elif viztype == 4:
                result = json.dumps(yelpReviewSentiment(business_id),
                                    sort_keys=False)
                returncode = 0  # success
            else:
                print(f"Error: There is no viztype {str(viztype)}.")
                returncode = 1  # error

            # update table ds_vizdata and ds_vizdata_log
            if returncode == 0:
                updateVizdata(business_id, viztype, result)
                insertVizdataLog(business_id, viztype,
                                 triggeredby=1)  # triggered by end user
    except Exception as e:
        print(e)
        returncode = 1  # error

    return HttpResponse(result)

    viztype = request.GET.get('viztype')
    if viztype == '1':
        result = json.dumps(yelpTrendyPhrases(business_id), sort_keys=False)
    elif viztype == '2':
        result = json.dumps(yelpReviewCountMonthly(business_id),
                            sort_keys=False)
    else:  # viztype0 and viztype3
        result = json.dumps(getDataViztype0(business_id), sort_keys=False)
    return HttpResponse(result)