def impression_list(request): err_msg = '' where_clause = '' """ Process times and POST ============= """ duration_hrs = 2 end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-duration_hrs) if 'earliest_utc_ts' in request.POST: if cmp(request.POST['earliest_utc_ts'], '') != 0: earliest_utc_ts = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip()) format = TP.getTimestampFormat(earliest_utc_ts) if format == 1: start_time = earliest_utc_ts if format == 2: start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1) elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'latest_utc_ts' in request.POST: if cmp(request.POST['latest_utc_ts'], '') != 0: latest_utc_ts = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip()) format = TP.getTimestampFormat(latest_utc_ts) if format == 1: end_time = latest_utc_ts if format == 2: end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1) elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'iso_code' in request.POST: if cmp(request.POST['iso_code'], '') != 0: iso_code = MySQLdb._mysql.escape_string(request.POST['iso_code'].strip()) where_clause = "where bi.country regexp '%s' " % iso_code """ Format and execute query ======================== """ query_name = 'report_country_impressions.sql' sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time, start_time, end_time, where_clause) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() imp_table = DR.DataReporting()._write_html_table(results, column_names) return render_to_response('live_results/impression_list.html', {'imp_table' : imp_table.decode("utf-8"), 'err_msg' : err_msg, 'start' : TP.timestamp_convert_format(start_time, 1, 2), 'end' : TP.timestamp_convert_format(end_time, 1, 2)}, context_instance=RequestContext(request))
def timestamps_to_dict(time_lists): isList = 0 if type(time_lists) is list: isList = 1 old_list = time_lists time_lists = mh.AutoVivification() key = 'key' time_lists[key] = list() for i in range(len(old_list)): time_lists[key].append(old_list[i]) return [time_lists, isList]
def execute_process(self, key, **kwargs): logging.info('Commencing caching of live results data at: %s' % self.CACHING_HOME) shelve_key = key """ Find the earliest and latest page views for a given campaign """ lptl = DL.LandingPageTableLoader(db='db1025') query_name = 'report_summary_results_country.sql' query_name_1S = 'report_summary_results_country_1S.sql' campaign_regexp_filter = '^C_|^C11_' dl = DL.DataLoader(db='db1025') end_time, start_time = TP.timestamps_for_interval( datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS) """ Should a one-step query be used? """ use_one_step = lptl.is_one_step( start_time, end_time, 'C11' ) # Assume it is a one step test if there are no impressions for this campaign in the landing page table """ Retrieve the latest time for which impressions have been loaded =============================================================== """ sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00' results = dl.execute_SQL(sql_stmnt) latest_timestamp = results[0][0] latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3) latest_timestamp_flat = TP.timestamp_convert_format( latest_timestamp, 2, 1) ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range( start_time, end_time, campaign_regexp_filter, one_step=use_one_step) measured_metrics_counts = ret[1] """ Prepare Summary results """ sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) logging.info('Executing report_summary_results ...') results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() if use_one_step: logging.info('... including one step artifacts ...') sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S) sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) results = list(results) results_1S = dl.execute_SQL(sql_stmnt_1S) """ Ensure that the results are unique """ one_step_keys = list() for row in results_1S: one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2])) new_results = list() for row in results: key = str(row[0]) + str(row[1]) + str(row[2]) if not (key in one_step_keys): new_results.append(row) results = new_results results.extend(list(results_1S)) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting( query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() """ Create a interval loader objects """ sampling_interval = 5 # 5 minute sampling interval for donation plots ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') """ Execute queries """ ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '', {}) ir_banner.run(start_time, end_time, sampling_interval, 'donations', '', {}) ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {}) """ Prepare serialized objects """ dict_param = dict() dict_param['metric_legend_table'] = metric_legend_table dict_param['conf_legend_table'] = conf_legend_table dict_param['measured_metrics_counts'] = measured_metrics_counts dict_param['results'] = results dict_param['column_names'] = column_names dict_param['interval'] = sampling_interval dict_param['duration'] = self.DURATION_HRS dict_param['start_time'] = TP.timestamp_convert_format( start_time, 1, 2) dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2) dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_ dict_param['ir_banner_counts'] = ir_banner._counts_ dict_param['ir_lp_counts'] = ir_lp._counts_ dict_param['ir_cmpgn_times'] = ir_cmpgn._times_ dict_param['ir_banner_times'] = ir_banner._times_ dict_param['ir_lp_times'] = ir_lp._times_ self.clear_cached_data(shelve_key) self.cache_data(dict_param, shelve_key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of long term trends data at: %s' % self.CACHING_HOME) end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \ hours=-self.VIEW_DURATION_HRS, resolution=1) """ DATA CONFIG """ countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time) countries = countries[1:6] """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='storage3') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']} payment_groups = {'Credit Card': ['^cc$'], 'Paypal': ['^pp$']} currency_groups = { 'USD': ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)'] } lang_cntry_groups = { 'US': ['US..', '.{4}'], 'EN': ['[^U^S]en', '.{4}'] } top_cntry_groups = dict() for country in countries: top_cntry_groups[country] = [country, '.{2}'] # To include click rate # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3] # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True] metrics = [ 'impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate' ] weights = ['', '', '', '', '', '', 'donations', 'donations', '', ''] metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6] groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \ lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups] """ The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \ ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \ ['country', 'language'], ['payment_method'], ['payment_method']] metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_] include_totals = [ True, True, True, False, True, True, False, False, False, True ] include_others = [ True, True, True, False, True, True, True, True, True, False ] hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0] time_unit = [ TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR ] data = list() """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ for index in range(len(metrics)): dr = DR.DataReporting() times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \ groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \ include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \ time_unit=time_unit[index]) times = TP.normalize_timestamps(times, False, time_unit[index]) dr._counts_ = counts dr._times_ = times empty_data = [0] * len(times[times.keys()[0]]) data.append(dr.get_data_lists([''], empty_data)) dict_param = Hlp.combine_data_lists(data) dict_param['interval'] = self.VIEW_DURATION_HRS dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def evaluate(filename): """ Evaluate the models based on the timestamp provided. :param filename: Timestamp of the latest run set. :return: """ device = torch.device("cuda:0" if cuda.is_available() else "cpu") helper = Helper() testing_set, testing_loader = helper.get_data(mode="test", testing_batch_size=1) print("Starting evaluation") to_tensor = transforms.ToTensor() true_val = {} score = {} tpr = {} fpr = {} thresh = {} area = {} for set_n in range(1, 11): true_val[set_n] = [] score[set_n] = [] model = load_model(filename + str(set_n) + ".pt", device) model.eval() total_len = len(testing_loader[set_n]) with torch.no_grad(): for i, (list_1, list_2, labels) in enumerate(testing_loader[set_n]): if type(list_1).__name__ != 'list' or type( list_2).__name__ != 'list': print("Issues with testing file at location {0}".format(i)) print(list_1) print(list_2) continue l1_avg = np.zeros([1, model.features]) l1 = 0 l2_avg = np.zeros([1, model.features]) l2 = 0 for im in list_1: try: image = Image.open(im[0]) tensor_img = to_tensor(image).to(device) output = model(tensor_img.unsqueeze(0)) l1_avg += output.cpu().numpy() l1 += 1 except FileNotFoundError: print("File {0} not found. Skipping.".format(im)) l1_avg /= l1 for im in list_2: try: image = Image.open(im[0]) tensor_img = to_tensor(image).to(device) output = model(tensor_img.unsqueeze(0)) l2_avg += output.cpu().numpy() l2 += 1 except FileNotFoundError: print("File {0} not found. Skipping.".format(im)) l2_avg /= l2 s = cosine_similarity(l1_avg.reshape(1, -1), l2_avg.reshape(1, -1))[0, 0] score[set_n].append(s) true_val[set_n].append(labels.item()) if (i + 1) % 500 == 0: print("Step: {0}/{1}".format(i, total_len)) # print(score[1][i], true_val[1][i]) # Code to evaluate ROC graph is taken from the official documentation. # https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py fpr[set_n], tpr[set_n], thresh[set_n] = roc_curve( np.asarray(true_val[set_n]), np.asarray(score[set_n])) area[set_n] = auc(fpr[set_n], tpr[set_n]) plt.figure() plt.plot(fpr[set_n], tpr[set_n], color='darkorange', lw=2, label="ROC curve (area = {0:.2f})".format(area[set_n])) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic for Split {0}'.format( set_n)) plt.legend(loc="lower right") plt.savefig( fname="images/{1}ROC{0}.jpg".format(set_n, filename[:-1])) color_list = [ "aqua", "chocolate", "brown", "navy", "lime", "olive", "silver", "gold", "pink", "magenta" ] plt.figure() print("Thresholds aquired:") for set_n in range(1, 11): print("Split {0}".format(set_n), thresh[set_n]) plt.plot(fpr[set_n], tpr[set_n], color=color_list[set_n - 1], lw=1, label="Split {0}".format(set_n)) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic (Consolidated)}') plt.legend(loc="lower right") plt.savefig(fname="images/{0}ROC_ALL.jpg".format(filename[:-1])) pd.DataFrame(true_val).to_csv( path_or_buf="results/{0}GT.csv".format(filename[:-1])) pd.DataFrame(score).to_csv( path_or_buf="results/{0}Score.csv".format(filename[:-1])) pd.DataFrame(tpr).to_csv( path_or_buf="results/{0}TPR.csv".format(filename[:-1])) pd.DataFrame(fpr).to_csv( path_or_buf="results/{0}FPR.csv".format(filename[:-1])) pd.DataFrame(thresh).to_csv( path_or_buf="results/{0}TH.csv".format(filename[:-1])) pd.DataFrame(area).to_csv( path_or_buf="results/{0}Area.csv".format(filename[:-1])) print("\n\nDone")
def normalize_timestamps(time_lists, count_back, time_unit): """ Convert timestamps if they are strings """ if isinstance(time_lists, list): time_lists = timestamp_list_to_obj(time_lists) elif isinstance(time_lists, dict): time_lists = timestamp_dict_to_obj(time_lists) else: logging.error( 'TimestampProcessor::normalize_timestamps -- Timestamps must be contained in a list or dictionary.' ) return dict() time_lists, isList = timestamps_to_dict(time_lists) """ Depending on args set the start date """ if count_back: start_date_obj = find_latest_date_in_list(time_lists) else: start_date_obj = find_earliest_date_in_list(time_lists) start_month = start_date_obj.month start_day = start_date_obj.day start_hr = start_date_obj.hour start_mte = start_date_obj.minute length_of_month = cal.mdays[start_month] # Normalize dates time_norm = mh.AutoVivification() for key in time_lists.keys(): for date_obj in time_lists[key]: month = date_obj.month day = date_obj.day hr = date_obj.hour mte = date_obj.minute if time_unit == 0: elem = (date_obj - start_date_obj ).days + (date_obj - start_date_obj).seconds / ( 60 * 60 * 24) # Time difference in days elif time_unit == 1: elem = (date_obj - start_date_obj).days * 24 + ( date_obj - start_date_obj).seconds / ( 60 * 60) # Time difference in hours elif time_unit == 2: elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + ( mte - start_mte) elif time_unit == 3: elem = (month - start_month) * 24 * 60 * length_of_month + ( day - start_day) * 24 * 60 + (hr - start_hr) * 60 + ( mte - start_mte) try: time_norm[key].append(elem) except: time_norm[key] = list() time_norm[key].append(elem) """ If the original argument was a list put it back in that form """ if isList: time_norm = time_norm[key] return time_norm
def index(request): """ PROCESS POST DATA ================= Escape all user input that can be entered in text fields """ try: campaign_regexp_filter = MySQLdb._mysql.escape_string( request.POST['campaign_regexp_filter']) if cmp(campaign_regexp_filter, '') == 0: campaign_regexp_filter = '^C_|^C11_' except: campaign_regexp_filter = '^C_|^C11_' try: min_donation = MySQLdb._mysql.escape_string( request.POST['min_donation'].strip()) min_donation = int(min_donation) except: min_donation = 0 # Filter on ISO codes to include matched countries try: iso_filter = MySQLdb._mysql.escape_string( request.POST['iso_filter'].strip()) except: iso_filter = '.{2}' """ Call up cached results """ cache = DC.LiveResults_DataCaching() dict_param = cache.retrieve_cached_data(view_keys.LIVE_RESULTS_DICT_KEY) measured_metrics_counts = dict_param['measured_metrics_counts'] results = dict_param['results'] column_names = dict_param['column_names'] sampling_interval = dict_param['interval'] duration_hrs = dict_param['duration'] start_time = dict_param['start_time'] end_time = dict_param['end_time'] ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False) ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False) ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False) ir_cmpgn._counts_ = dict_param['ir_cmpgn_counts'] ir_banner._counts_ = dict_param['ir_banner_counts'] ir_lp._counts_ = dict_param['ir_lp_counts'] ir_cmpgn._times_ = dict_param['ir_cmpgn_times'] ir_banner._times_ = dict_param['ir_banner_times'] ir_lp._times_ = dict_param['ir_lp_times'] metric_legend_table = dict_param['metric_legend_table'] conf_legend_table = dict_param['conf_legend_table'] """ Filtering -- donations and artifacts """ country_index = column_names.index('country') donations_index = column_names.index('donations') campaign_index = column_names.index('utm_campaign') new_results = list() # minimum d for row in results: try: if row[donations_index] > min_donation and re.search( campaign_regexp_filter, row[campaign_index]) and re.search( iso_filter, row[country_index]): new_results.append(list(row)) except: logging.error( 'live_results/views.py -- Could not process row: %s' % str(row)) results = new_results new_measured_metrics_counts = dict() for metric in measured_metrics_counts: new_measured_metrics_counts[metric] = dict() for artifact_key in measured_metrics_counts[metric]: if re.search(campaign_regexp_filter, artifact_key): new_measured_metrics_counts[metric][ artifact_key] = measured_metrics_counts[metric][ artifact_key] """ Format results to encode html table cell markup in results """ ret = DR.ConfidenceReporting( query_type='', hyp_test='').get_confidence_on_time_range( None, None, None, measured_metrics_counts=new_measured_metrics_counts ) # first get color codes on confidence conf_colour_code = ret[0] for row_index in range(len(results)): artifact_index = results[row_index][0] + '-' + results[row_index][ 1] + '-' + results[row_index][2] for col_index in range(len(column_names)): is_coloured_cell = False if column_names[col_index] in conf_colour_code.keys(): if artifact_index in conf_colour_code[ column_names[col_index]].keys(): results[row_index][ col_index] = '<td style="background-color:' + conf_colour_code[ column_names[col_index]][ artifact_index] + ';">' + str( results[row_index][col_index]) + '</td>' is_coloured_cell = True if not (is_coloured_cell): results[row_index][col_index] = '<td>' + str( results[row_index][col_index]) + '</td>' if results: summary_table = DR.DataReporting()._write_html_table( results, column_names, use_standard_metric_names=True, omit_cell_markup=True) else: summary_table = '<p><font size="4">No data available.</font></p>' summary_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \ '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + summary_table """ Prepare Live Plots """ """ compose a list of zero data """ empty_data = [[1.0, 0.0]] * (duration_hrs * 60 / sampling_interval + 1) for i in range(len(empty_data)): empty_data[i][0] = empty_data[i][0] * i * sampling_interval """ Extract data from interval reporting objects """ cmpgn_data_dict = ir_cmpgn.get_data_lists( ['C_', 'C11_', campaign_regexp_filter], empty_data) cmpgn_banner_dict = ir_banner.get_data_lists(['B_', 'B11_'], empty_data) cmpgn_lp_dict = ir_lp.get_data_lists(['L11_', '^cc'], empty_data) """ Build template parameters """ template_dict = Hlp.combine_data_lists( [cmpgn_data_dict, cmpgn_banner_dict, cmpgn_lp_dict]) # combine the separate data sets template_dict['summary_table'] = summary_table template_dict['latest_log_end_time'] = end_time template_dict['start_time'] = start_time return render_to_response('live_results/index.html', template_dict, context_instance=RequestContext(request))
def impression_list(request): err_msg = '' where_clause = '' """ Process times and POST ============= """ duration_hrs = 2 end_time, start_time = TP.timestamps_for_interval( datetime.datetime.utcnow(), 1, hours=-duration_hrs) if 'earliest_utc_ts' in request.POST: if cmp(request.POST['earliest_utc_ts'], '') != 0: earliest_utc_ts = MySQLdb._mysql.escape_string( request.POST['earliest_utc_ts'].strip()) format = TP.getTimestampFormat(earliest_utc_ts) if format == 1: start_time = earliest_utc_ts if format == 2: start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1) elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'latest_utc_ts' in request.POST: if cmp(request.POST['latest_utc_ts'], '') != 0: latest_utc_ts = MySQLdb._mysql.escape_string( request.POST['latest_utc_ts'].strip()) format = TP.getTimestampFormat(latest_utc_ts) if format == 1: end_time = latest_utc_ts if format == 2: end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1) elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'iso_code' in request.POST: if cmp(request.POST['iso_code'], '') != 0: iso_code = MySQLdb._mysql.escape_string( request.POST['iso_code'].strip()) where_clause = "where bi.country regexp '%s' " % iso_code """ Format and execute query ======================== """ query_name = 'report_country_impressions.sql' sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time, start_time, end_time, where_clause) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() imp_table = DR.DataReporting()._write_html_table(results, column_names) return render_to_response( 'live_results/impression_list.html', { 'imp_table': imp_table.decode("utf-8"), 'err_msg': err_msg, 'start': TP.timestamp_convert_format(start_time, 1, 2), 'end': TP.timestamp_convert_format(end_time, 1, 2) }, context_instance=RequestContext(request))
def execute_process(self, key, **kwargs): logging.info('Commencing caching of live results data at: %s' % self.CACHING_HOME) shelve_key = key """ Find the earliest and latest page views for a given campaign """ lptl = DL.LandingPageTableLoader(db='db1025') query_name = 'report_summary_results_country.sql' query_name_1S = 'report_summary_results_country_1S.sql' campaign_regexp_filter = '^C_|^C11_' dl = DL.DataLoader(db='db1025') end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS) """ Should a one-step query be used? """ use_one_step = lptl.is_one_step(start_time, end_time, 'C11') # Assume it is a one step test if there are no impressions for this campaign in the landing page table """ Retrieve the latest time for which impressions have been loaded =============================================================== """ sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00' results = dl.execute_SQL(sql_stmnt) latest_timestamp = results[0][0] latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3) latest_timestamp_flat = TP.timestamp_convert_format(latest_timestamp, 2, 1) ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range(start_time, end_time, campaign_regexp_filter, one_step=use_one_step) measured_metrics_counts = ret[1] """ Prepare Summary results """ sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) logging.info('Executing report_summary_results ...') results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() if use_one_step: logging.info('... including one step artifacts ...') sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S) sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) results = list(results) results_1S = dl.execute_SQL(sql_stmnt_1S) """ Ensure that the results are unique """ one_step_keys = list() for row in results_1S: one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2])) new_results = list() for row in results: key = str(row[0]) + str(row[1]) + str(row[2]) if not(key in one_step_keys): new_results.append(row) results = new_results results.extend(list(results_1S)) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() """ Create a interval loader objects """ sampling_interval = 5 # 5 minute sampling interval for donation plots ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') """ Execute queries """ ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',{}) ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',{}) ir_lp.run(start_time, end_time, sampling_interval, 'donations', '',{}) """ Prepare serialized objects """ dict_param = dict() dict_param['metric_legend_table'] = metric_legend_table dict_param['conf_legend_table'] = conf_legend_table dict_param['measured_metrics_counts'] = measured_metrics_counts dict_param['results'] = results dict_param['column_names'] = column_names dict_param['interval'] = sampling_interval dict_param['duration'] = self.DURATION_HRS dict_param['start_time'] = TP.timestamp_convert_format(start_time,1,2) dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2) dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_ dict_param['ir_banner_counts'] = ir_banner._counts_ dict_param['ir_lp_counts'] = ir_lp._counts_ dict_param['ir_cmpgn_times'] = ir_cmpgn._times_ dict_param['ir_banner_times'] = ir_banner._times_ dict_param['ir_lp_times'] = ir_lp._times_ self.clear_cached_data(shelve_key) self.cache_data(dict_param, shelve_key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of fundraiser totals data at: %s' % self.CACHING_HOME) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ DATA CONFIG """ """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='db1025') start_of_2011_fundraiser = '20111116000000' countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_of_2011_fundraiser) countries.append('Total') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ year_groups = dict() for country in countries: if cmp(country, 'Total') == 0: year_groups['2011 Total'] = ['2011.*'] year_groups['2010 Total'] = ['2010.*'] else: year_groups['2011 ' + country] = ['2011' + country] year_groups['2010 ' + country] = ['2010' + country] metrics = 'amount' weights = '' groups = year_groups group_metrics = ['year', 'country'] metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_ include_totals = False include_others = False hours_back = 0 time_unit = TP.DAY """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ dr = DR.DataReporting() times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \ include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit) dict_param = dict() for country in countries: key_2011 = '2011 ' + country key_2010 = '2010 ' + country new_counts = dict() new_counts[key_2010] = counts[key_2010] new_counts[key_2011] = counts[key_2011] new_times = dict() new_times[key_2010] = times[key_2010] new_times[key_2011] = times[key_2011] dr._counts_ = new_counts dr._times_ = new_times empty_data = [0] * len(new_times[new_times.keys()[0]]) data = list() data.append(dr.get_data_lists([''], empty_data)) dict_param[country] = Hlp.combine_data_lists(data) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of long term trends data at: %s' % self.CACHING_HOME) end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \ hours=-self.VIEW_DURATION_HRS, resolution=1) """ DATA CONFIG """ countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time) countries = countries[1:6] """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='storage3') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']} payment_groups = {'Credit Card' : ['^cc$'], 'Paypal': ['^pp$']} currency_groups = {'USD' : ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)']} lang_cntry_groups = {'US': ['US..', '.{4}'], 'EN' : ['[^U^S]en', '.{4}']} top_cntry_groups = dict() for country in countries: top_cntry_groups[country] = [country, '.{2}'] # To include click rate # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3] # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True] metrics = ['impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate'] weights = ['', '', '', '', '', '', 'donations', 'donations', '', ''] metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6] groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \ lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups] """ The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \ ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \ ['country', 'language'], ['payment_method'], ['payment_method']] metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \ DL.LongTermTrendsLoader._MT_RATE_] include_totals = [True, True, True, False, True, True, False, False, False, True] include_others = [True, True, True, False, True, True, True, True, True, False] hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0] time_unit = [TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR] data = list() """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ for index in range(len(metrics)): dr = DR.DataReporting() times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \ groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \ include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \ time_unit=time_unit[index]) times = TP.normalize_timestamps(times, False, time_unit[index]) dr._counts_ = counts dr._times_ = times empty_data = [0] * len(times[times.keys()[0]]) data.append(dr.get_data_lists([''], empty_data)) dict_param = Hlp.combine_data_lists(data) dict_param['interval'] = self.VIEW_DURATION_HRS dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def index(request): """ PROCESS POST DATA ================= Escape all user input that can be entered in text fields """ try: campaign_regexp_filter = MySQLdb._mysql.escape_string(request.POST['campaign_regexp_filter']) if cmp(campaign_regexp_filter, '') == 0: campaign_regexp_filter = '^C_|^C11_' except: campaign_regexp_filter = '^C_|^C11_' try: min_donation = MySQLdb._mysql.escape_string(request.POST['min_donation'].strip()) min_donation = int(min_donation) except: min_donation = 0 # Filter on ISO codes to include matched countries try: iso_filter = MySQLdb._mysql.escape_string(request.POST['iso_filter'].strip()) except: iso_filter = '.{2}' """ Call up cached results """ cache = DC.LiveResults_DataCaching() dict_param = cache.retrieve_cached_data(view_keys.LIVE_RESULTS_DICT_KEY) measured_metrics_counts = dict_param['measured_metrics_counts'] results = dict_param['results'] column_names = dict_param['column_names'] sampling_interval = dict_param['interval'] duration_hrs = dict_param['duration'] start_time = dict_param['start_time'] end_time = dict_param['end_time'] ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False) ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False) ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False) ir_cmpgn._counts_ = dict_param['ir_cmpgn_counts'] ir_banner._counts_ = dict_param['ir_banner_counts'] ir_lp._counts_ = dict_param['ir_lp_counts'] ir_cmpgn._times_ = dict_param['ir_cmpgn_times'] ir_banner._times_ = dict_param['ir_banner_times'] ir_lp._times_ = dict_param['ir_lp_times'] metric_legend_table = dict_param['metric_legend_table'] conf_legend_table = dict_param['conf_legend_table'] """ Filtering -- donations and artifacts """ country_index = column_names.index('country') donations_index = column_names.index('donations') campaign_index = column_names.index('utm_campaign') new_results = list() # minimum d for row in results: try: if row[donations_index] > min_donation and re.search(campaign_regexp_filter, row[campaign_index]) and re.search(iso_filter, row[country_index]): new_results.append(list(row)) except: logging.error('live_results/views.py -- Could not process row: %s' % str(row)) results = new_results new_measured_metrics_counts = dict() for metric in measured_metrics_counts: new_measured_metrics_counts[metric] = dict() for artifact_key in measured_metrics_counts[metric]: if re.search(campaign_regexp_filter, artifact_key): new_measured_metrics_counts[metric][artifact_key] = measured_metrics_counts[metric][artifact_key] """ Format results to encode html table cell markup in results """ ret = DR.ConfidenceReporting(query_type='', hyp_test='').get_confidence_on_time_range(None, None, None, measured_metrics_counts=new_measured_metrics_counts) # first get color codes on confidence conf_colour_code = ret[0] for row_index in range(len(results)): artifact_index = results[row_index][0] + '-' + results[row_index][1] + '-' + results[row_index][2] for col_index in range(len(column_names)): is_coloured_cell = False if column_names[col_index] in conf_colour_code.keys(): if artifact_index in conf_colour_code[column_names[col_index]].keys(): results[row_index][col_index] = '<td style="background-color:' + conf_colour_code[column_names[col_index]][artifact_index] + ';">' + str(results[row_index][col_index]) + '</td>' is_coloured_cell = True if not(is_coloured_cell): results[row_index][col_index] = '<td>' + str(results[row_index][col_index]) + '</td>' if results: summary_table = DR.DataReporting()._write_html_table(results, column_names, use_standard_metric_names=True, omit_cell_markup=True) else: summary_table = '<p><font size="4">No data available.</font></p>' summary_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \ '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + summary_table """ Prepare Live Plots """ """ compose a list of zero data """ empty_data = [[1.0, 0.0]] * (duration_hrs * 60 / sampling_interval + 1) for i in range(len(empty_data)): empty_data[i][0] = empty_data[i][0] * i * sampling_interval """ Extract data from interval reporting objects """ cmpgn_data_dict = ir_cmpgn.get_data_lists(['C_', 'C11_', campaign_regexp_filter], empty_data) cmpgn_banner_dict = ir_banner.get_data_lists(['B_', 'B11_'], empty_data) cmpgn_lp_dict = ir_lp.get_data_lists(['L11_', '^cc'], empty_data) """ Build template parameters """ template_dict = Hlp.combine_data_lists([cmpgn_data_dict, cmpgn_banner_dict, cmpgn_lp_dict]) # combine the separate data sets template_dict['summary_table'] = summary_table template_dict['latest_log_end_time'] = end_time template_dict['start_time'] = start_time return render_to_response('live_results/index.html', template_dict, context_instance=RequestContext(request))
def daily_totals(request): err_msg = '' start_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0) end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0) country = '.{2}' min_donation = 0 order_str = 'order by 1 desc,3 desc' """ PROCESS POST """ if 'start_day_ts' in request.POST: if cmp(request.POST['start_day_ts'], '') != 0: start_day_ts = MySQLdb._mysql.escape_string(request.POST['start_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1) # start_day_ts = start_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'end_day_ts' in request.POST: if cmp(request.POST['end_day_ts'], '') != 0: end_day_ts = MySQLdb._mysql.escape_string(request.POST['end_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1) # end_day_ts = end_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'country' in request.POST: if cmp(request.POST['country'], '') != 0: country = MySQLdb._mysql.escape_string(request.POST['country']) if 'min_donation' in request.POST: if cmp(request.POST['min_donation'], '') != 0: try: min_donation = int(MySQLdb._mysql.escape_string(request.POST['min_donation'].strip())) except: logging.error('live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip()) min_donation = 0 if 'order_metric' in request.POST: if cmp(request.POST['order_metric'], 'Date') == 0: order_str = 'order by 1 desc,3 desc' elif cmp(request.POST['order_metric'], 'Country') == 0: order_str = 'order by 2 asc,1 desc' """ === END POST === """ query_name = 'report_daily_totals_by_country' filename = projSet.__sql_home__+ query_name + '.sql' sql_stmnt = Hlp.file_to_string(filename) sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) html_table = DR.DataReporting()._write_html_table(results, dl.get_column_names(), use_standard_metric_names=True) return render_to_response('live_results/daily_totals.html', \ {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \ context_instance=RequestContext(request))
def execute_process(self, key, **kwargs): logging.info('Commencing caching of fundraiser totals data at: %s' % self.CACHING_HOME) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ DATA CONFIG """ """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='db1025') start_of_2011_fundraiser = '20111116000000' countries = DL.CiviCRMLoader().get_ranked_donor_countries( start_of_2011_fundraiser) countries.append('Total') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ year_groups = dict() for country in countries: if cmp(country, 'Total') == 0: year_groups['2011 Total'] = ['2011.*'] year_groups['2010 Total'] = ['2010.*'] else: year_groups['2011 ' + country] = ['2011' + country] year_groups['2010 ' + country] = ['2010' + country] metrics = 'amount' weights = '' groups = year_groups group_metrics = ['year', 'country'] metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_ include_totals = False include_others = False hours_back = 0 time_unit = TP.DAY """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ dr = DR.DataReporting() times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \ include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit) dict_param = dict() for country in countries: key_2011 = '2011 ' + country key_2010 = '2010 ' + country new_counts = dict() new_counts[key_2010] = counts[key_2010] new_counts[key_2011] = counts[key_2011] new_times = dict() new_times[key_2010] = times[key_2010] new_times[key_2011] = times[key_2011] dr._counts_ = new_counts dr._times_ = new_times empty_data = [0] * len(new_times[new_times.keys()[0]]) data = list() data.append(dr.get_data_lists([''], empty_data)) dict_param[country] = Hlp.combine_data_lists(data) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def daily_totals(request): err_msg = '' start_day_ts = TP.timestamp_from_obj( datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0) end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0) country = '.{2}' min_donation = 0 order_str = 'order by 1 desc,3 desc' """ PROCESS POST """ if 'start_day_ts' in request.POST: if cmp(request.POST['start_day_ts'], '') != 0: start_day_ts = MySQLdb._mysql.escape_string( request.POST['start_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1) # start_day_ts = start_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'end_day_ts' in request.POST: if cmp(request.POST['end_day_ts'], '') != 0: end_day_ts = MySQLdb._mysql.escape_string( request.POST['end_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1) # end_day_ts = end_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'country' in request.POST: if cmp(request.POST['country'], '') != 0: country = MySQLdb._mysql.escape_string(request.POST['country']) if 'min_donation' in request.POST: if cmp(request.POST['min_donation'], '') != 0: try: min_donation = int( MySQLdb._mysql.escape_string( request.POST['min_donation'].strip())) except: logging.error( 'live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip()) min_donation = 0 if 'order_metric' in request.POST: if cmp(request.POST['order_metric'], 'Date') == 0: order_str = 'order by 1 desc,3 desc' elif cmp(request.POST['order_metric'], 'Country') == 0: order_str = 'order by 2 asc,1 desc' """ === END POST === """ query_name = 'report_daily_totals_by_country' filename = projSet.__sql_home__ + query_name + '.sql' sql_stmnt = Hlp.file_to_string(filename) sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) html_table = DR.DataReporting()._write_html_table( results, dl.get_column_names(), use_standard_metric_names=True) return render_to_response('live_results/daily_totals.html', \ {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \ context_instance=RequestContext(request))
def format_query(query_name, sql_stmnt, args, **kwargs): country, min_donation, order_str = process_kwargs(kwargs) if cmp(query_name, 'report_campaign_ecomm') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time)) elif cmp(query_name, 'report_campaign_logs') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time, start_time, start_time)) elif cmp(query_name, 'report_campaign_ecomm_by_hr') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time)) elif cmp(query_name, 'report_campaign_logs_by_hr') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ start_time, '%', '%', '%', '%', start_time, '%')) elif cmp(query_name, 'report_impressions_country') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time)) elif cmp(query_name, 'report_campaign_logs_by_min') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ start_time, '%', '%', '%', '%', start_time)) elif cmp(query_name, 'report_non_US_clicks') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)) elif cmp(query_name, 'report_contribution_tracking') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', '%',start_time)) elif cmp(query_name, 'report_total_amounts_by_hr') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)) elif cmp(query_name, 'report_total_amounts_by_day') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '', start_time, end_time)) elif cmp(query_name, 'report_LP_metrics') == 0 or cmp(query_name, 'report_LP_metrics_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] min_views = args[3] """ Format the condition for minimum views """ if cmp(str(min_views), '-1') == 0: min_views = ' ' else: min_views = 'where lp.views > ' + str(min_views) + ' ' sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views)) elif cmp(query_name, 'report_banner_metrics') == 0 or cmp(query_name, 'report_bannerLP_metrics') == 0 or cmp(query_name, 'report_total_metrics') == 0 or \ cmp(query_name, 'report_banner_metrics_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_1S') == 0 or cmp(query_name, 'report_total_metrics_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] min_views = args[3] """ Format the condition for minimum views """ if cmp(str(min_views), '-1') == 0: min_views = ' ' else: min_views = 'where lp.views > ' + str(min_views) + ' ' sql_stmnt = str(sql_stmnt % (start_time, end_time, country, start_time, end_time, campaign, country, start_time, end_time, country, \ start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views)) elif cmp(query_name, 'report_latest_campaign') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time)) elif cmp(query_name, 'report_banner_impressions_by_hour') == 0: start = args[0] end = args[1] sql_stmnt = str(sql_stmnt % ('%','%','%','%', start, end)) elif cmp(query_name, 'report_ecomm_by_amount') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)) elif cmp(query_name, 'report_ecomm_by_contact') == 0: where_str = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', where_str)) elif cmp(query_name, 'report_LP_metrics_minutely') == 0 or cmp(query_name, 'report_LP_metrics_minutely_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] """ The start time for the impression portion of the query should be one second less""" start_time_obj = TP.timestamp_to_obj(start_time,1) imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3) sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, \ start_time, end_time, campaign, country, campaign)) elif cmp(query_name, 'report_banner_metrics_minutely') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely') == 0 or cmp(query_name, 'report_banner_metrics_minutely_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] """ The start time for the impression portion of the query should be one second less""" start_time_obj = TP.timestamp_to_obj(start_time,1) imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3) sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time_obj_str, end_time, \ country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, \ '%', '%', '%', '%', interval, interval, start_time, end_time, country, \ '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \ country, start_time, end_time, campaign, country, campaign, )) elif cmp(query_name, 'report_campaign_metrics_minutely') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_1S') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_total') == 0 \ or cmp(query_name, 'report_campaign_metrics_minutely_total_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] sql_stmnt = str(sql_stmnt % (campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country)) elif cmp(query_name, 'report_campaign_totals') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % (start_time, end_time)) elif cmp(query_name, 'report_campaign_banners') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_lps') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_bannerlps') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_metrics_minutely_all') == 0 or cmp(query_name, 'report_banner_metrics_minutely_all') == 0 or cmp(query_name, 'report_lp_metrics_minutely_all') == 0: start_time = args[0] end_time = args[1] interval = args[3] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time)) elif cmp(query_name, 'report_donation_metrics') == 0: start_time = args[0] end_time = args[1] campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country)) elif cmp(query_name, 'report_total_donations') == 0: start_time = args[0] end_time = args[1] campaign = args[2] """ Recursively construct the sub-query """ sub_query_name = 'report_donation_metrics' sub_query_sql = Hlp.file_to_string(projSet.__sql_home__ + sub_query_name + '.sql') sub_query_sql = format_query(sub_query_name, sub_query_sql, [start_time, end_time, campaign], country=country) sql_stmnt = str(sql_stmnt % sub_query_sql) elif cmp(query_name, 'report_daily_totals_by_country') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, end_time, country, min_donation, order_str)) else: return 'no such table\n' return sql_stmnt
def main(batch_size, num_epochs, lr, file_write, flag_dummy, temperature, lr_decay, features): """ Main function for training. :param batch_size : Batch size to use. :param num_epochs : Number of epochs for each split. :param lr : Learning rate to be set at the start of each split. :param file_write : Write output to stdout(default) or a file. :param flag_dummy : Create a dummy file for evaluation. :param temperature : Set default temperature for softmax/log_softmax layer while training. :param lr_decay : Learning rate decay for every drop in min loss observed. :param features : Number of nodes for penultimate feature layer. :return: """ # ''' ---------------------Parameters---------------------''' device = torch.device("cuda:0" if cuda.is_available() else "cpu") # optim_name = 'SGD' # optim_name = 'RMS' optim_name = 'Adam' batch_print = 50 op_dir = "pickles/" t_stmp = time.strftime("%Y%m%d_%H%M%S", time.gmtime()) # Creating a file to store the name of the latest models ff = open("latest_t_stmp.txt", 'w') ff.write("A2_T{0}_S".format(t_stmp)) ff.close() helper = Helper("log/log_" + t_stmp + ".txt") helper.write_file(file_write) helper.log(msg="Starting data loading.") training_set, training_loader = helper.get_data( mode="train", training_batch_size=batch_size) helper.log(msg="Finished data loading. Starting main training.") for set_n in range(1, 11): init_lr = lr model, criterion, optimizer = mod.get_model(device, optim_name, lamb=0, learning_rate=init_lr, final_features=features) model.train(True) model.set_temperature(temperature) if flag_dummy: helper.log(msg="\nCreating dummy file.\n") dummy_file = { "model": model.state_dict(), "criterion": criterion.state_dict(), "optimizer": optimizer.state_dict(), "optim_name": optim_name, "features": features } torch.save(dummy_file, op_dir + "dummy.pt") flag_dummy = False helper.log(msg="\nStart of split {0}\n".format(set_n)) total_len = len(training_loader[set_n]) running_loss = 0.0 cor = 0 tot = 0 cor_b = 0 tot_b = 0 past_loss = 6.0 * batch_print for epoch in range(num_epochs): for i, (images, labels) in enumerate(training_loader[set_n]): # Change variable type to match GPU requirements inp = images.to(device) lab = labels.to(device) # Reset gradients before processing optimizer.zero_grad() # Get model output out = model(inp) # Calculate loss loss = criterion(out, lab) # Accuracy calc _, predicted = torch.max(out.data, 1) tot_b += batch_size cor_b += (predicted == lab).sum().item() tot += batch_size cor += (predicted == lab).sum().item() # Update weights loss.backward() optimizer.step() running_loss += loss.item() # logger.log(msg="\rLoss = {0} ".format(l), end="") if (i + 1) % batch_print == 0: helper.log( msg="Split: {3}, Epoch: {0}, step: {1}/{2} ".format( epoch + 1, i + 1, total_len, set_n), end="\t") helper.log( msg="Running Loss (avg): {0:.06f}, Past: {1:.06f}". format((running_loss / batch_print), (past_loss / batch_print)), end="\t") helper.log( msg="Accuracy: (Per {2})|(Total): {0:.03f}|{1:.03f} %". format((cor_b * 100) / tot_b, (cor * 100) / tot, batch_size * batch_print), end="\t") if running_loss < past_loss: past_loss = running_loss init_lr *= lr_decay for params in optimizer.param_groups: params['lr'] = max(init_lr, 0.001) helper.log(msg="LR: {0:.06f}".format(init_lr)) running_loss = 0.0 cor_b = 0 tot_b = 0 filename = op_dir + "A2_T{1}_S{0}.pt".format(set_n, t_stmp) # Idea for named saved file was picked up from here: # https://github.com/quiltdata/pytorch-examples/blob/master/imagenet/main.py save_file = { "model": model.state_dict(), "criterion": criterion.state_dict(), "optimizer": optimizer.state_dict(), "optim_name": optim_name, "features": features } torch.save(save_file, filename) helper.log( msg="\nFile {0} saved for split {1}".format(filename, set_n)) helper.close()