def execute_process(self, key, **kwargs): logging.info('Commencing caching of fundraiser totals data at: %s' % self.CACHING_HOME) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ DATA CONFIG """ """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='db1025') start_of_2011_fundraiser = '20111116000000' countries = DL.CiviCRMLoader().get_ranked_donor_countries( start_of_2011_fundraiser) countries.append('Total') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ year_groups = dict() for country in countries: if cmp(country, 'Total') == 0: year_groups['2011 Total'] = ['2011.*'] year_groups['2010 Total'] = ['2010.*'] else: year_groups['2011 ' + country] = ['2011' + country] year_groups['2010 ' + country] = ['2010' + country] metrics = 'amount' weights = '' groups = year_groups group_metrics = ['year', 'country'] metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_ include_totals = False include_others = False hours_back = 0 time_unit = TP.DAY """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ dr = DR.DataReporting() times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \ include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit) dict_param = dict() for country in countries: key_2011 = '2011 ' + country key_2010 = '2010 ' + country new_counts = dict() new_counts[key_2010] = counts[key_2010] new_counts[key_2011] = counts[key_2011] new_times = dict() new_times[key_2010] = times[key_2010] new_times[key_2011] = times[key_2011] dr._counts_ = new_counts dr._times_ = new_times empty_data = [0] * len(new_times[new_times.keys()[0]]) data = list() data.append(dr.get_data_lists([''], empty_data)) dict_param[country] = Hlp.combine_data_lists(data) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of long term trends data at: %s' % self.CACHING_HOME) end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \ hours=-self.VIEW_DURATION_HRS, resolution=1) """ DATA CONFIG """ countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time) countries = countries[1:6] """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='storage3') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']} payment_groups = {'Credit Card': ['^cc$'], 'Paypal': ['^pp$']} currency_groups = { 'USD': ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)'] } lang_cntry_groups = { 'US': ['US..', '.{4}'], 'EN': ['[^U^S]en', '.{4}'] } top_cntry_groups = dict() for country in countries: top_cntry_groups[country] = [country, '.{2}'] # To include click rate # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3] # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True] metrics = [ 'impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate' ] weights = ['', '', '', '', '', '', 'donations', 'donations', '', ''] metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6] groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \ lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups] """ The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \ ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \ ['country', 'language'], ['payment_method'], ['payment_method']] metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_] include_totals = [ True, True, True, False, True, True, False, False, False, True ] include_others = [ True, True, True, False, True, True, True, True, True, False ] hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0] time_unit = [ TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR ] data = list() """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ for index in range(len(metrics)): dr = DR.DataReporting() times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \ groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \ include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \ time_unit=time_unit[index]) times = TP.normalize_timestamps(times, False, time_unit[index]) dr._counts_ = counts dr._times_ = times empty_data = [0] * len(times[times.keys()[0]]) data.append(dr.get_data_lists([''], empty_data)) dict_param = Hlp.combine_data_lists(data) dict_param['interval'] = self.VIEW_DURATION_HRS dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def generate_summary(request): try: err_msg = '' """ PROCESS POST DATA ================= Escape all user input that can be entered in text fields """ if 'utm_campaign' in request.POST: utm_campaign = MySQLdb._mysql.escape_string( request.POST['utm_campaign']) if 'start_time' in request.POST: start_time = MySQLdb._mysql.escape_string( request.POST['start_time'].strip()) if not (TP.is_timestamp(start_time, 1)) and not (TP.is_timestamp( start_time, 2)): err_msg = 'Incorrectly formatted start timestamp.' raise Exception() if 'end_time' in request.POST: end_time = MySQLdb._mysql.escape_string( request.POST['end_time'].strip()) if not (TP.is_timestamp(end_time, 1)) and not (TP.is_timestamp( end_time, 2)): err_msg = 'Incorrectly formatted end timestamp.' raise Exception() if 'iso_filter' in request.POST: country = MySQLdb._mysql.escape_string(request.POST['iso_filter']) else: country = '.{2}' if 'measure_confidence' in request.POST: if cmp(request.POST['measure_confidence'], 'yes') == 0: measure_confidence = True else: measure_confidence = False else: measure_confidence = False if 'one_step' in request.POST: if cmp(request.POST['one_step'], 'yes') == 0: use_one_step = True else: use_one_step = False else: use_one_step = False if 'donations_only' in request.POST: if cmp(request.POST['donations_only'], 'yes') == 0: donations_only = True else: donations_only = False else: donations_only = False """ Convert timestamp format if necessary """ if TP.is_timestamp(start_time, 2): start_time = TP.timestamp_convert_format(start_time, 2, 1) if TP.is_timestamp(end_time, 2): end_time = TP.timestamp_convert_format(end_time, 2, 1) """ =============================================== """ """ GENERATE A REPORT SUMMARY TABLE =============================== """ if donations_only: srl = DL.SummaryReportingLoader( query_type=FDH._TESTTYPE_DONATIONS_) else: srl = DL.SummaryReportingLoader( query_type=FDH._TESTTYPE_BANNER_LP_) srl.run_query(start_time, end_time, utm_campaign, min_views=-1, country=country) column_names = srl.get_column_names() summary_results = srl.get_results() if not (summary_results): html_table = '<h3>No artifact summary data available for %s.</h3>' % utm_campaign else: summary_results_list = list() for row in summary_results: summary_results_list.append(list(row)) summary_results = summary_results_list """ Format results to encode html table cell markup in results """ if measure_confidence: ret = DR.ConfidenceReporting( query_type='', hyp_test='').get_confidence_on_time_range( start_time, end_time, utm_campaign, one_step=use_one_step, country=country) # first get color codes on confidence conf_colour_code = ret[0] for row_index in range(len(summary_results)): artifact_index = summary_results[row_index][ 0] + '-' + summary_results[row_index][ 1] + '-' + summary_results[row_index][2] for col_index in range(len(column_names)): is_coloured_cell = False if column_names[col_index] in conf_colour_code.keys(): if artifact_index in conf_colour_code[ column_names[col_index]].keys(): summary_results[row_index][ col_index] = '<td style="background-color:' + conf_colour_code[ column_names[col_index]][ artifact_index] + ';">' + str( summary_results[row_index] [col_index]) + '</td>' is_coloured_cell = True if not (is_coloured_cell): summary_results[row_index][ col_index] = '<td>' + str( summary_results[row_index] [col_index]) + '</td>' html_table = DR.DataReporting()._write_html_table( summary_results, column_names, use_standard_metric_names=True, omit_cell_markup=True) else: html_table = DR.DataReporting()._write_html_table( summary_results, column_names, use_standard_metric_names=True) """ Generate totals only if it's a non-donation-only query """ if donations_only: srl = DL.SummaryReportingLoader( query_type=FDH._QTYPE_TOTAL_DONATIONS_) else: srl = DL.SummaryReportingLoader(query_type=FDH._QTYPE_TOTAL_) srl.run_query(start_time, end_time, utm_campaign, min_views=-1, country=country) total_summary_results = srl.get_results() if not (total_summary_results): html_table = html_table + '<div class="spacer"></div><div class="spacer"></div><h3>No data available for %s Totals.</h3>' % utm_campaign else: html_table = html_table + '<div class="spacer"></div><div class="spacer"></div>' + DR.DataReporting( )._write_html_table(total_summary_results, srl.get_column_names(), use_standard_metric_names=True) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting( query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() html_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \ '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + html_table """ DETERMINE PAYMENT METHODS ========================= """ ccl = DL.CiviCRMLoader() pm_data_counts, pm_data_conversions = ccl.get_payment_methods( utm_campaign, start_time, end_time, country=country) html_table_pm_counts = DR.IntervalReporting( ).write_html_table_from_rowlists( pm_data_counts, ['Payment Method', 'Portion of Donations (%)'], 'Landing Page') html_table_pm_conversions = DR.IntervalReporting( ).write_html_table_from_rowlists(pm_data_conversions, [ 'Payment Method', 'Visits', 'Conversions', 'Conversion Rate (%)', 'Amount', 'Amount 25' ], 'Landing Page') html_table = html_table + '<div class="spacer"></div><h4><u>Payment Methods Breakdown:</u></h4><div class="spacer"></div>' + html_table_pm_counts + \ '<div class="spacer"></div><div class="spacer"></div>' + html_table_pm_conversions + '<div class="spacer"></div><div class="spacer"></div>' return render_to_response('tests/table_summary.html', { 'html_table': html_table, 'utm_campaign': utm_campaign }, context_instance=RequestContext(request)) except Exception as inst: if cmp(err_msg, '') == 0: err_msg = 'Could not generate campaign tabular results.' return index(request, err_msg=err_msg)
def show_campaigns(request, utm_campaign, **kwargs): """ PROCESS POST KWARGS =================== """ err_msg = '' try: err_msg = str(kwargs['kwargs']['err_msg']) except: pass test_type_override = '' try: test_type_override = MySQLdb._mysql.escape_string( request.POST['test_type_override']) if test_type_override == 'Banner': test_type_var = FDH._TESTTYPE_BANNER_ elif test_type_override == 'Landing Page': test_type_var = FDH._TESTTYPE_LP_ elif test_type_override == 'Banner and LP': test_type_var = FDH._TESTTYPE_BANNER_LP_ except: test_type_var = '' pass try: """ Find the earliest and latest page views for a given campaign """ lptl = DL.LandingPageTableLoader() ccrml = DL.CiviCRMLoader() start_time = ccrml.get_earliest_donation(utm_campaign) end_time = ccrml.get_latest_donation(utm_campaign) one_step = lptl.is_one_step(start_time, end_time, utm_campaign) if not (one_step): start_time = lptl.get_earliest_campaign_view(utm_campaign) end_time = lptl.get_latest_campaign_view(utm_campaign) interval = 1 """ Create reporting object to retrieve campaign data and write plots to image repo on disk """ ir = DR.IntervalReporting(was_run=False, use_labels=False, font_size=20, plot_type='line', query_type='campaign', file_path=projSet.__web_home__ + 'campaigns/static/images/') """ Produce analysis on the campaign view data """ ir.run(start_time, end_time, interval, 'views', utm_campaign, {}, one_step=one_step) """ ESTIMATE THE START AND END TIME OF THE CAMPAIGN =============================================== Search for the first instance when more than 10 views are observed over a sampling period """ col_names = ir._data_loader_.get_column_names() views_index = col_names.index('views') ts_index = col_names.index('ts') row_list = list(ir._data_loader_._results_) # copy the query results for row in row_list: if row[views_index] > 100: start_time_est = row[ts_index] break row_list.reverse() for row in row_list: if row[views_index] > 100: end_time_est = row[ts_index] break """ BUILD THE VISUALIZATION FOR THE TEST VIEWS OF THIS CAMAPAIGN ============================================================ """ """ Read the test name """ ttl = DL.TestTableLoader() row = ttl.get_test_row(utm_campaign) test_name = ttl.get_test_field(row, 'test_name') """ Regenerate the data using the estimated start and end times """ ir = DR.IntervalReporting(was_run=False, use_labels=False, font_size=20, plot_type='line', query_type='campaign', file_path=projSet.__web_home__ + 'campaigns/static/images/') ir.run(start_time_est, end_time_est, interval, 'views', utm_campaign, {}, one_step=one_step) """ Determine the type of test (if not overridden) and retrieve the artifacts """ test_type, artifact_name_list = FDH.get_test_type( utm_campaign, start_time, end_time, DL.CampaignReportingLoader(query_type=''), test_type_var) return render_to_response('campaigns/show_campaigns.html', {'utm_campaign' : utm_campaign, 'test_name' : test_name, 'start_time' : start_time_est, 'end_time' : end_time_est, 'one_step' : one_step, \ 'artifacts' : artifact_name_list, 'test_type' : test_type, 'err_msg' : err_msg}, context_instance=RequestContext(request)) except Exception as inst: logging.error('Failed to correctly produce campaign diagnostics.') logging.error(type(inst)) logging.error(inst.args) logging.error(inst) """ Return to the index page with an error """ err_msg = 'There is insufficient data to analyze this campaign: %s. Check to see if the <a href="/LML/">impressions have been loaded</a>. <br><br>ERROR:<br><br>%s' % ( utm_campaign, inst.__str__()) return index(request, kwargs={'err_msg': err_msg})
def generate_reporting_objects(test_name, start_time, end_time, campaign, label_dict, label_dict_full, sample_interval, test_interval, test_type, metric_types, one_step_var, country): """ Labels will always be metric names in this case """ # e.g. labels = {'Static banner':'20101227_JA061_US','Fading banner':'20101228_JAFader_US'} use_labels_var = True """ Build reporting objects """ ir_cmpgn = DR.IntervalReporting(use_labels=False, font_size=20, plot_type='line', query_type='campaign', file_path=projSet.__web_home__ + 'campaigns/static/images/') """ DETERMINE DONOR DOLLAR BREAKDOWN ================================ """ try: logging.info('') logging.info('Determining Donations Distribution:') logging.info('===================================\n') DR.DonorBracketReporting(query_type=FDH._QTYPE_LP_, file_path=projSet.__web_home__ + 'tests/static/images/').run( start_time, end_time, campaign) except: pass """ DETERMINE CATEGORY DISTRIBUTION =============================== """ if (0): DR.CategoryReporting(file_path=projSet.__web_home__ + 'tests/static/images/').run( start_time, end_time, campaign) """ DETERMINE LANGUAGE BREAKDOWN ============================ """ html_language = '' if (1): logging.info('') logging.info('Determining Languages Distribution:') logging.info('===================================\n') columns, data = DL.CiviCRMLoader().get_donor_by_language( campaign, start_time, end_time) html_language = DR.DataReporting()._write_html_table(data, columns) """ DETERMINE PAYMENT METHODS ========================= """ logging.info('') logging.info('Determining Payment Methods:') logging.info('============================\n') ccl = DL.CiviCRMLoader() pm_data_counts, pm_data_conversions = ccl.get_payment_methods( campaign, start_time, end_time, country=country) html_table_pm_counts = DR.IntervalReporting( ).write_html_table_from_rowlists( pm_data_counts, ['Payment Method', 'Portion of Donations (%)'], 'Landing Page') html_table_pm_conversions = DR.IntervalReporting( ).write_html_table_from_rowlists(pm_data_conversions, [ 'Payment Method', 'Visits', 'Conversions', 'Conversion Rate (%)', 'Amount', 'Amount 25' ], 'Landing Page') """ BUILD REPORTING OBJECTS ======================= """ if test_type == FDH._TESTTYPE_BANNER_: ir = DR.IntervalReporting(use_labels=use_labels_var, font_size=20, plot_type='step', query_type=FDH._QTYPE_BANNER_, file_path=projSet.__web_home__ + 'tests/static/images/') link_item = '<a href="http://meta.wikimedia.org/w/index.php?title=Special:NoticeTemplate/view&template=%s">%s</a>' measured_metric = ['don_per_imp', 'amt_norm_per_imp', 'click_rate'] elif test_type == FDH._TESTTYPE_LP_: ir = DR.IntervalReporting(use_labels=use_labels_var, font_size=20, plot_type='step', query_type=FDH._QTYPE_LP_, file_path=projSet.__web_home__ + 'tests/static/images/') link_item = '<a href="http://meta.wikimedia.org/w/index.php?title=Special:NoticeTemplate/view&template=%s">%s</a>' measured_metric = ['don_per_view', 'amt_norm_per_view'] elif test_type == FDH._TESTTYPE_BANNER_LP_: ir = DR.IntervalReporting(use_labels=use_labels_var, font_size=20, plot_type='step', query_type=FDH._QTYPE_BANNER_LP_, file_path=projSet.__web_home__ + 'tests/static/images/') link_item = '<a href="http://meta.wikimedia.org/w/index.php?title=Special:NoticeTemplate/view&template=%s">%s</a>' measured_metric = [ 'don_per_imp', 'amt_norm_per_imp', 'don_per_view', 'amt_norm_per_view', 'click_rate' ] """ GENERATE PLOTS FOR EACH METRIC OF INTEREST ========================================== """ logging.info('') logging.info('Determining Metric Minutely Counts:') logging.info('==================================\n') for metric in metric_types: ir.run(start_time, end_time, sample_interval, metric, campaign, label_dict, one_step=one_step_var, country=country) """ CHECK THE CAMPAIGN VIEWS AND DONATIONS ====================================== """ ir_cmpgn.run(start_time, end_time, sample_interval, 'views', campaign, {}, one_step=one_step_var, country=country) ir_cmpgn.run(start_time, end_time, sample_interval, 'donations', campaign, {}, one_step=one_step_var, country=country) """ PERFORM HYPOTHESIS TESTING ========================== """ logging.info('') logging.info('Executing Confidence Queries:') logging.info('============================\n') column_colours = dict() confidence = list() cr = DR.ConfidenceReporting(use_labels=use_labels_var, font_size=20, plot_type='line', hyp_test='t_test', query_type=test_type, file_path=projSet.__web_home__ + 'tests/static/images/') for metric in measured_metric: ret = cr.run(test_name, campaign, metric, label_dict, start_time, end_time, sample_interval, one_step=one_step_var, country=country) confidence.append(ret[0]) column_colours[metric] = ret[1] """ GENERATE A REPORT SUMMARY TABLE =============================== """ logging.info('') logging.info('Generating Summary Report:') logging.info('=========================\n') """ if one_step_var == True: summary_start_time = DL.CiviCRMLoader().get_earliest_donation(campaign) else: summary_start_time = DL.LandingPageTableLoader().get_earliest_campaign_view(campaign) summary_end_time = DL.CiviCRMLoader().get_latest_donation(campaign) """ srl = DL.SummaryReportingLoader(query_type=test_type) srl.run_query(start_time, end_time, campaign, one_step=one_step_var, country=country) columns = srl.get_column_names() summary_results = srl.get_results() """ REMOVED - links to pipeline artifacts, this was broken and should be implemented properly later """ """ Get Winners, Losers, and percent increase """ winner = list() loser = list() percent_increase = list() labels = list() for item_long_name in label_dict: labels.append(label_dict[item_long_name]) for metric in measured_metric: ret = srl.compare_artifacts(label_dict.keys(), metric, labels=labels) winner.append(ret[0]) loser.append(ret[1]) percent_increase.append(ret[2]) """ Compose table for showing artifact """ html_table = DR.DataReporting()._write_html_table( summary_results, columns, coloured_columns=column_colours, use_standard_metric_names=True) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting( query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() html_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \ '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + html_table """ Generate totals for the test summary """ srl = DL.SummaryReportingLoader(query_type=FDH._QTYPE_TOTAL_) srl.run_query(start_time, end_time, campaign, one_step=one_step_var, country=country) html_table = html_table + '<br><br>' + DR.DataReporting( )._write_html_table(srl.get_results(), srl.get_column_names(), use_standard_metric_names=True) return [ measured_metric, winner, loser, percent_increase, confidence, html_table_pm_counts, html_table_pm_conversions, html_language, html_table ]