コード例 #1
0
ファイル: views.py プロジェクト: rfaulkner/WMF_Analytics
def json_out(request, utm_campaign):

    utm_campaign = MySQLdb._mysql.escape_string(str(utm_campaign))

    dl = DL.DataLoader(db='db1025')
    lptl = DL.LandingPageTableLoader(db='db1008')

    start_time = lptl.get_earliest_campaign_view(utm_campaign)
    end_time = lptl.get_latest_campaign_view(utm_campaign)
    """ 
        Get the views from the given campaign for each banner 
        =====================================================
    """

    logging.info('Determining views for campaign %s' % utm_campaign)
    sql = "select utm_source, count(*) as views from landing_page_requests where utm_campaign = '%s' and request_time >= %s and request_time <= %s group by 1" % (
        utm_campaign, start_time, end_time)
    results = dl.execute_SQL(str(sql))
    """ builf the condition string for banners to be used in SQL to retrieve impressions"""
    views = dict()
    banner_str = ''
    for row in results:
        views[str(row[0])] = int(row[1])
        banner_str_piece = "utm_source = '%s' or " % row[0]
        banner_str = banner_str + banner_str_piece
    banner_str = banner_str[:-4]
    """ 
        Get the impressions from the given campaign for each banner 
        ===========================================================
    """

    logging.info('Determining impressions for campaign %s' % utm_campaign)
    sql = "select utm_source, sum(counts) from banner_impressions where (%s) and on_minute >= '%s' and on_minute <= '%s' group by 1" % (
        banner_str, start_time, end_time)
    results = dl.execute_SQL(str(sql))
    """ Build JSON, compute click rates """
    click_rate = dict()
    json = 'insertStatistics({ '
    err_str = ''
    for row in results:
        try:
            utm_source = row[0]
            click_rate = float(views[utm_source]) / float(int(row[1]))
            item = '"%s" : %s , ' % (utm_source, click_rate)
            json = json + item

        except:
            err_str = err_str + utm_source + ' '

    json = json[:-2] + '});'

    return render_to_response('live_results/json_out.html', {'html': json},
                              context_instance=RequestContext(request))
コード例 #2
0
    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of live results data at:  %s' %
                     self.CACHING_HOME)
        shelve_key = key
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')

        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'
        campaign_regexp_filter = '^C_|^C11_'

        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(
            datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        """ Should a one-step query be used? """
        use_one_step = lptl.is_one_step(
            start_time, end_time, 'C11'
        )  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """

        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'

        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(
            latest_timestamp, 2, 1)

        ret = DR.ConfidenceReporting(query_type='', hyp_test='',
                                     db='db1025').get_confidence_on_time_range(
                                         start_time,
                                         end_time,
                                         campaign_regexp_filter,
                                         one_step=use_one_step)
        measured_metrics_counts = ret[1]
        """ Prepare Summary results """

        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

        logging.info('Executing report_summary_results ...')

        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()

        if use_one_step:

            logging.info('... including one step artifacts ...')

            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ +
                                              query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

            results = list(results)
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))

            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not (key in one_step_keys):
                    new_results.append(row)
            results = new_results

            results.extend(list(results_1S))

        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(
            query_type='bannerlp',
            hyp_test='TTest').get_confidence_legend_table()
        """ Create a interval loader objects """

        sampling_interval = 5  # 5 minute sampling interval for donation plots

        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ +
                                        FDH._QTYPE_TIME_,
                                        generate_plot=False,
                                        db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ +
                                         FDH._QTYPE_TIME_,
                                         generate_plot=False,
                                         db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ +
                                     FDH._QTYPE_TIME_,
                                     generate_plot=False,
                                     db='db1025')
        """ Execute queries """
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',
                     {})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',
                      {})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {})
        """ Prepare serialized objects """

        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table

        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS

        dict_param['start_time'] = TP.timestamp_convert_format(
            start_time, 1, 2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_

        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_

        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)

        logging.info('Caching complete.')
コード例 #3
0
def show_campaigns(request, utm_campaign, **kwargs):
    """ 
        PROCESS POST KWARGS 
        ===================
    """

    err_msg = ''
    try:
        err_msg = str(kwargs['kwargs']['err_msg'])
    except:
        pass

    test_type_override = ''
    try:
        test_type_override = MySQLdb._mysql.escape_string(
            request.POST['test_type_override'])

        if test_type_override == 'Banner':
            test_type_var = FDH._TESTTYPE_BANNER_
        elif test_type_override == 'Landing Page':
            test_type_var = FDH._TESTTYPE_LP_
        elif test_type_override == 'Banner and LP':
            test_type_var = FDH._TESTTYPE_BANNER_LP_

    except:
        test_type_var = ''
        pass

    try:
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader()
        ccrml = DL.CiviCRMLoader()

        start_time = ccrml.get_earliest_donation(utm_campaign)
        end_time = ccrml.get_latest_donation(utm_campaign)

        one_step = lptl.is_one_step(start_time, end_time, utm_campaign)

        if not (one_step):
            start_time = lptl.get_earliest_campaign_view(utm_campaign)
            end_time = lptl.get_latest_campaign_view(utm_campaign)

        interval = 1
        """ Create reporting object to retrieve campaign data and write plots to image repo on disk """
        ir = DR.IntervalReporting(was_run=False,
                                  use_labels=False,
                                  font_size=20,
                                  plot_type='line',
                                  query_type='campaign',
                                  file_path=projSet.__web_home__ +
                                  'campaigns/static/images/')
        """ Produce analysis on the campaign view data """
        ir.run(start_time,
               end_time,
               interval,
               'views',
               utm_campaign, {},
               one_step=one_step)
        """ 
            ESTIMATE THE START AND END TIME OF THE CAMPAIGN
            ===============================================
            
            Search for the first instance when more than 10 views are observed over a sampling period
        """

        col_names = ir._data_loader_.get_column_names()

        views_index = col_names.index('views')
        ts_index = col_names.index('ts')

        row_list = list(ir._data_loader_._results_)  # copy the query results
        for row in row_list:
            if row[views_index] > 100:
                start_time_est = row[ts_index]
                break
        row_list.reverse()
        for row in row_list:
            if row[views_index] > 100:
                end_time_est = row[ts_index]
                break
        """
            BUILD THE VISUALIZATION FOR THE TEST VIEWS OF THIS CAMAPAIGN
            ============================================================        
        """
        """ Read the test name """
        ttl = DL.TestTableLoader()
        row = ttl.get_test_row(utm_campaign)
        test_name = ttl.get_test_field(row, 'test_name')
        """ Regenerate the data using the estimated start and end times """
        ir = DR.IntervalReporting(was_run=False,
                                  use_labels=False,
                                  font_size=20,
                                  plot_type='line',
                                  query_type='campaign',
                                  file_path=projSet.__web_home__ +
                                  'campaigns/static/images/')
        ir.run(start_time_est,
               end_time_est,
               interval,
               'views',
               utm_campaign, {},
               one_step=one_step)
        """ Determine the type of test (if not overridden) and retrieve the artifacts  """
        test_type, artifact_name_list = FDH.get_test_type(
            utm_campaign, start_time, end_time,
            DL.CampaignReportingLoader(query_type=''), test_type_var)

        return render_to_response('campaigns/show_campaigns.html', {'utm_campaign' : utm_campaign, 'test_name' : test_name, 'start_time' : start_time_est, 'end_time' : end_time_est, 'one_step' : one_step, \
                                                                    'artifacts' : artifact_name_list, 'test_type' : test_type, 'err_msg' : err_msg}, context_instance=RequestContext(request))

    except Exception as inst:

        logging.error('Failed to correctly produce campaign diagnostics.')
        logging.error(type(inst))
        logging.error(inst.args)
        logging.error(inst)
        """ Return to the index page with an error """
        err_msg = 'There is insufficient data to analyze this campaign: %s.  Check to see if the <a href="/LML/">impressions have been loaded</a>. <br><br>ERROR:<br><br>%s' % (
            utm_campaign, inst.__str__())

        return index(request, kwargs={'err_msg': err_msg})