Python Helper Examples, classes.Helper Python Examples

Example #1

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def impression_list(request):
    
    err_msg = ''
    where_clause = ''
    
    """ 
        Process times and POST
        =============
    """
    duration_hrs = 2
    end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-duration_hrs)    
    
    if 'earliest_utc_ts' in request.POST:
        if cmp(request.POST['earliest_utc_ts'], '') != 0:
            earliest_utc_ts = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip())
            format = TP.getTimestampFormat(earliest_utc_ts)
            
            if format == 1:
                start_time = earliest_utc_ts
            if format == 2:
                start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'
    
    if 'latest_utc_ts' in request.POST:
        if cmp(request.POST['latest_utc_ts'], '') != 0:
            latest_utc_ts = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip())
            format = TP.getTimestampFormat(latest_utc_ts)
            
            if format == 1:
                end_time = latest_utc_ts
            if format == 2:
                end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'
            
    if 'iso_code' in request.POST:
        if cmp(request.POST['iso_code'], '') != 0:
            iso_code = MySQLdb._mysql.escape_string(request.POST['iso_code'].strip())
            where_clause = "where bi.country regexp '%s' " % iso_code
                    
    """ 
        Format and execute query 
        ========================
    """
        
    query_name = 'report_country_impressions.sql'    
    
    sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
    sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time, start_time, end_time, where_clause)
    
    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    column_names = dl.get_column_names()

    imp_table = DR.DataReporting()._write_html_table(results, column_names)
    
    return render_to_response('live_results/impression_list.html', {'imp_table' : imp_table.decode("utf-8"), 'err_msg' : err_msg, 'start' : TP.timestamp_convert_format(start_time, 1, 2), 'end' : TP.timestamp_convert_format(end_time, 1, 2)},  context_instance=RequestContext(request))

Example #2

0

Show file

def timestamps_to_dict(time_lists):

    isList = 0
    if type(time_lists) is list:
        isList = 1

        old_list = time_lists
        time_lists = mh.AutoVivification()

        key = 'key'
        time_lists[key] = list()

        for i in range(len(old_list)):
            time_lists[key].append(old_list[i])

    return [time_lists, isList]

Example #3

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of live results data at:  %s' %
                     self.CACHING_HOME)
        shelve_key = key
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')

        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'
        campaign_regexp_filter = '^C_|^C11_'

        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(
            datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        """ Should a one-step query be used? """
        use_one_step = lptl.is_one_step(
            start_time, end_time, 'C11'
        )  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """

        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'

        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(
            latest_timestamp, 2, 1)

        ret = DR.ConfidenceReporting(query_type='', hyp_test='',
                                     db='db1025').get_confidence_on_time_range(
                                         start_time,
                                         end_time,
                                         campaign_regexp_filter,
                                         one_step=use_one_step)
        measured_metrics_counts = ret[1]
        """ Prepare Summary results """

        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

        logging.info('Executing report_summary_results ...')

        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()

        if use_one_step:

            logging.info('... including one step artifacts ...')

            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ +
                                              query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

            results = list(results)
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))

            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not (key in one_step_keys):
                    new_results.append(row)
            results = new_results

            results.extend(list(results_1S))

        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(
            query_type='bannerlp',
            hyp_test='TTest').get_confidence_legend_table()
        """ Create a interval loader objects """

        sampling_interval = 5  # 5 minute sampling interval for donation plots

        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ +
                                        FDH._QTYPE_TIME_,
                                        generate_plot=False,
                                        db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ +
                                         FDH._QTYPE_TIME_,
                                         generate_plot=False,
                                         db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ +
                                     FDH._QTYPE_TIME_,
                                     generate_plot=False,
                                     db='db1025')
        """ Execute queries """
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',
                     {})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',
                      {})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {})
        """ Prepare serialized objects """

        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table

        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS

        dict_param['start_time'] = TP.timestamp_convert_format(
            start_time, 1, 2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_

        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_

        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)

        logging.info('Caching complete.')

Example #4

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of long term trends data at:  %s' %
                     self.CACHING_HOME)

        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                          hours=-self.VIEW_DURATION_HRS, resolution=1)
        """ DATA CONFIG """

        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
        countries = countries[1:6]
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='storage3')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
        payment_groups = {'Credit Card': ['^cc$'], 'Paypal': ['^pp$']}
        currency_groups = {
            'USD': ['(USD)'],
            'CAD': ['(CAD)'],
            'JPY': ['(JPY)'],
            'EUR': ['(EUR)']
        }
        lang_cntry_groups = {
            'US': ['US..', '.{4}'],
            'EN': ['[^U^S]en', '.{4}']
        }

        top_cntry_groups = dict()
        for country in countries:
            top_cntry_groups[country] = [country, '.{2}']

        # To include click rate
        # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
        # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]

        metrics = [
            'impressions', 'views', 'donations', 'donations', 'amount',
            'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate'
        ]
        weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
        metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
        groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
                  lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
        """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """
        group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                         ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                         ['country', 'language'], ['payment_method'], ['payment_method']]

        metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_]

        include_totals = [
            True, True, True, False, True, True, False, False, False, True
        ]
        include_others = [
            True, True, True, False, True, True, True, True, True, False
        ]
        hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
        time_unit = [
            TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR,
            TP.HOUR, TP.HOUR, TP.HOUR
        ]

        data = list()
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
        for index in range(len(metrics)):

            dr = DR.DataReporting()

            times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                            groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                            include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                            time_unit=time_unit[index])

            times = TP.normalize_timestamps(times, False, time_unit[index])

            dr._counts_ = counts
            dr._times_ = times

            empty_data = [0] * len(times[times.keys()[0]])
            data.append(dr.get_data_lists([''], empty_data))

        dict_param = Hlp.combine_data_lists(data)
        dict_param['interval'] = self.VIEW_DURATION_HRS
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')

Example #5

0

Show file

File: main_eval.py Project: vedvalsangkar/Unconstrained-Face-Recognizer

def evaluate(filename):
    """
    Evaluate the models based on the timestamp provided.
    :param filename: Timestamp of the latest run set.
    :return:
    """
    device = torch.device("cuda:0" if cuda.is_available() else "cpu")

    helper = Helper()

    testing_set, testing_loader = helper.get_data(mode="test",
                                                  testing_batch_size=1)

    print("Starting evaluation")

    to_tensor = transforms.ToTensor()

    true_val = {}
    score = {}
    tpr = {}
    fpr = {}
    thresh = {}
    area = {}

    for set_n in range(1, 11):

        true_val[set_n] = []
        score[set_n] = []

        model = load_model(filename + str(set_n) + ".pt", device)

        model.eval()

        total_len = len(testing_loader[set_n])

        with torch.no_grad():
            for i, (list_1, list_2,
                    labels) in enumerate(testing_loader[set_n]):

                if type(list_1).__name__ != 'list' or type(
                        list_2).__name__ != 'list':
                    print("Issues with testing file at location {0}".format(i))
                    print(list_1)
                    print(list_2)
                    continue

                l1_avg = np.zeros([1, model.features])
                l1 = 0

                l2_avg = np.zeros([1, model.features])
                l2 = 0

                for im in list_1:
                    try:
                        image = Image.open(im[0])
                        tensor_img = to_tensor(image).to(device)
                        output = model(tensor_img.unsqueeze(0))
                        l1_avg += output.cpu().numpy()
                        l1 += 1
                    except FileNotFoundError:
                        print("File {0} not found. Skipping.".format(im))
                l1_avg /= l1

                for im in list_2:
                    try:
                        image = Image.open(im[0])
                        tensor_img = to_tensor(image).to(device)
                        output = model(tensor_img.unsqueeze(0))
                        l2_avg += output.cpu().numpy()
                        l2 += 1
                    except FileNotFoundError:
                        print("File {0} not found. Skipping.".format(im))
                l2_avg /= l2

                s = cosine_similarity(l1_avg.reshape(1, -1),
                                      l2_avg.reshape(1, -1))[0, 0]
                score[set_n].append(s)
                true_val[set_n].append(labels.item())
                if (i + 1) % 500 == 0:
                    print("Step: {0}/{1}".format(i, total_len))
                #     print(score[1][i], true_val[1][i])

            # Code to evaluate ROC graph is taken from the official documentation.
            # https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py

            fpr[set_n], tpr[set_n], thresh[set_n] = roc_curve(
                np.asarray(true_val[set_n]), np.asarray(score[set_n]))
            area[set_n] = auc(fpr[set_n], tpr[set_n])

            plt.figure()
            plt.plot(fpr[set_n],
                     tpr[set_n],
                     color='darkorange',
                     lw=2,
                     label="ROC curve (area = {0:.2f})".format(area[set_n]))

            plt.xlim([0.0, 1.05])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('Receiver Operating Characteristic for Split {0}'.format(
                set_n))
            plt.legend(loc="lower right")
            plt.savefig(
                fname="images/{1}ROC{0}.jpg".format(set_n, filename[:-1]))

    color_list = [
        "aqua", "chocolate", "brown", "navy", "lime", "olive", "silver",
        "gold", "pink", "magenta"
    ]

    plt.figure()
    print("Thresholds aquired:")
    for set_n in range(1, 11):
        print("Split {0}".format(set_n), thresh[set_n])
        plt.plot(fpr[set_n],
                 tpr[set_n],
                 color=color_list[set_n - 1],
                 lw=1,
                 label="Split {0}".format(set_n))

    plt.xlim([0.0, 1.05])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (Consolidated)}')
    plt.legend(loc="lower right")
    plt.savefig(fname="images/{0}ROC_ALL.jpg".format(filename[:-1]))

    pd.DataFrame(true_val).to_csv(
        path_or_buf="results/{0}GT.csv".format(filename[:-1]))
    pd.DataFrame(score).to_csv(
        path_or_buf="results/{0}Score.csv".format(filename[:-1]))
    pd.DataFrame(tpr).to_csv(
        path_or_buf="results/{0}TPR.csv".format(filename[:-1]))
    pd.DataFrame(fpr).to_csv(
        path_or_buf="results/{0}FPR.csv".format(filename[:-1]))
    pd.DataFrame(thresh).to_csv(
        path_or_buf="results/{0}TH.csv".format(filename[:-1]))
    pd.DataFrame(area).to_csv(
        path_or_buf="results/{0}Area.csv".format(filename[:-1]))

    print("\n\nDone")

Example #6

0

Show file

def normalize_timestamps(time_lists, count_back, time_unit):
    """ Convert timestamps if they are strings """
    if isinstance(time_lists, list):
        time_lists = timestamp_list_to_obj(time_lists)
    elif isinstance(time_lists, dict):
        time_lists = timestamp_dict_to_obj(time_lists)
    else:
        logging.error(
            'TimestampProcessor::normalize_timestamps -- Timestamps must be contained in a list or dictionary.'
        )
        return dict()

    time_lists, isList = timestamps_to_dict(time_lists)
    """ Depending on args set the start date """
    if count_back:
        start_date_obj = find_latest_date_in_list(time_lists)
    else:
        start_date_obj = find_earliest_date_in_list(time_lists)

    start_month = start_date_obj.month
    start_day = start_date_obj.day
    start_hr = start_date_obj.hour
    start_mte = start_date_obj.minute

    length_of_month = cal.mdays[start_month]

    # Normalize dates
    time_norm = mh.AutoVivification()
    for key in time_lists.keys():
        for date_obj in time_lists[key]:

            month = date_obj.month
            day = date_obj.day
            hr = date_obj.hour
            mte = date_obj.minute

            if time_unit == 0:
                elem = (date_obj - start_date_obj
                        ).days + (date_obj - start_date_obj).seconds / (
                            60 * 60 * 24)  # Time difference in days
            elif time_unit == 1:
                elem = (date_obj - start_date_obj).days * 24 + (
                    date_obj - start_date_obj).seconds / (
                        60 * 60)  # Time difference in hours
            elif time_unit == 2:
                elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (
                    mte - start_mte)
            elif time_unit == 3:
                elem = (month - start_month) * 24 * 60 * length_of_month + (
                    day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (
                        mte - start_mte)
            try:
                time_norm[key].append(elem)
            except:
                time_norm[key] = list()
                time_norm[key].append(elem)
    """ If the original argument was a list put it back in that form """
    if isList:
        time_norm = time_norm[key]

    return time_norm

Example #7

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def index(request):
    """ 
        PROCESS POST DATA
        ================= 
        
        Escape all user input that can be entered in text fields 
        
    """
    try:
        campaign_regexp_filter = MySQLdb._mysql.escape_string(
            request.POST['campaign_regexp_filter'])

        if cmp(campaign_regexp_filter, '') == 0:
            campaign_regexp_filter = '^C_|^C11_'
    except:
        campaign_regexp_filter = '^C_|^C11_'

    try:
        min_donation = MySQLdb._mysql.escape_string(
            request.POST['min_donation'].strip())
        min_donation = int(min_donation)

    except:
        min_donation = 0

    # Filter on ISO codes to include matched countries
    try:
        iso_filter = MySQLdb._mysql.escape_string(
            request.POST['iso_filter'].strip())

    except:
        iso_filter = '.{2}'
    """
        Call up cached results
    """

    cache = DC.LiveResults_DataCaching()
    dict_param = cache.retrieve_cached_data(view_keys.LIVE_RESULTS_DICT_KEY)

    measured_metrics_counts = dict_param['measured_metrics_counts']
    results = dict_param['results']
    column_names = dict_param['column_names']
    sampling_interval = dict_param['interval']
    duration_hrs = dict_param['duration']

    start_time = dict_param['start_time']
    end_time = dict_param['end_time']

    ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ +
                                    FDH._QTYPE_TIME_,
                                    generate_plot=False)
    ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ +
                                     FDH._QTYPE_TIME_,
                                     generate_plot=False)
    ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_,
                                 generate_plot=False)

    ir_cmpgn._counts_ = dict_param['ir_cmpgn_counts']
    ir_banner._counts_ = dict_param['ir_banner_counts']
    ir_lp._counts_ = dict_param['ir_lp_counts']

    ir_cmpgn._times_ = dict_param['ir_cmpgn_times']
    ir_banner._times_ = dict_param['ir_banner_times']
    ir_lp._times_ = dict_param['ir_lp_times']

    metric_legend_table = dict_param['metric_legend_table']
    conf_legend_table = dict_param['conf_legend_table']
    """ Filtering -- donations and artifacts """

    country_index = column_names.index('country')
    donations_index = column_names.index('donations')
    campaign_index = column_names.index('utm_campaign')
    new_results = list()

    # minimum d
    for row in results:
        try:
            if row[donations_index] > min_donation and re.search(
                    campaign_regexp_filter, row[campaign_index]) and re.search(
                        iso_filter, row[country_index]):
                new_results.append(list(row))
        except:
            logging.error(
                'live_results/views.py -- Could not process row: %s' %
                str(row))

    results = new_results

    new_measured_metrics_counts = dict()
    for metric in measured_metrics_counts:
        new_measured_metrics_counts[metric] = dict()

        for artifact_key in measured_metrics_counts[metric]:
            if re.search(campaign_regexp_filter, artifact_key):
                new_measured_metrics_counts[metric][
                    artifact_key] = measured_metrics_counts[metric][
                        artifact_key]
    """ 
        Format results to encode html table cell markup in results        
    """

    ret = DR.ConfidenceReporting(
        query_type='', hyp_test='').get_confidence_on_time_range(
            None,
            None,
            None,
            measured_metrics_counts=new_measured_metrics_counts
        )  # first get color codes on confidence
    conf_colour_code = ret[0]

    for row_index in range(len(results)):
        artifact_index = results[row_index][0] + '-' + results[row_index][
            1] + '-' + results[row_index][2]

        for col_index in range(len(column_names)):

            is_coloured_cell = False
            if column_names[col_index] in conf_colour_code.keys():
                if artifact_index in conf_colour_code[
                        column_names[col_index]].keys():
                    results[row_index][
                        col_index] = '<td style="background-color:' + conf_colour_code[
                            column_names[col_index]][
                                artifact_index] + ';">' + str(
                                    results[row_index][col_index]) + '</td>'
                    is_coloured_cell = True

            if not (is_coloured_cell):
                results[row_index][col_index] = '<td>' + str(
                    results[row_index][col_index]) + '</td>'

    if results:
        summary_table = DR.DataReporting()._write_html_table(
            results,
            column_names,
            use_standard_metric_names=True,
            omit_cell_markup=True)
    else:
        summary_table = '<p><font size="4">No data available.</font></p>'

    summary_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \
    '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + summary_table
    """ 
        Prepare Live Plots
    """
    """ compose a list of zero data """
    empty_data = [[1.0, 0.0]] * (duration_hrs * 60 / sampling_interval + 1)
    for i in range(len(empty_data)):
        empty_data[i][0] = empty_data[i][0] * i * sampling_interval
    """ Extract data from interval reporting objects """
    cmpgn_data_dict = ir_cmpgn.get_data_lists(
        ['C_', 'C11_', campaign_regexp_filter], empty_data)
    cmpgn_banner_dict = ir_banner.get_data_lists(['B_', 'B11_'], empty_data)
    cmpgn_lp_dict = ir_lp.get_data_lists(['L11_', '^cc'], empty_data)
    """  
        Build template parameters
    """

    template_dict = Hlp.combine_data_lists(
        [cmpgn_data_dict, cmpgn_banner_dict,
         cmpgn_lp_dict])  # combine the separate data sets
    template_dict['summary_table'] = summary_table
    template_dict['latest_log_end_time'] = end_time
    template_dict['start_time'] = start_time

    return render_to_response('live_results/index.html',
                              template_dict,
                              context_instance=RequestContext(request))

Example #8

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def impression_list(request):

    err_msg = ''
    where_clause = ''
    """ 
        Process times and POST
        =============
    """
    duration_hrs = 2
    end_time, start_time = TP.timestamps_for_interval(
        datetime.datetime.utcnow(), 1, hours=-duration_hrs)

    if 'earliest_utc_ts' in request.POST:
        if cmp(request.POST['earliest_utc_ts'], '') != 0:
            earliest_utc_ts = MySQLdb._mysql.escape_string(
                request.POST['earliest_utc_ts'].strip())
            format = TP.getTimestampFormat(earliest_utc_ts)

            if format == 1:
                start_time = earliest_utc_ts
            if format == 2:
                start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'latest_utc_ts' in request.POST:
        if cmp(request.POST['latest_utc_ts'], '') != 0:
            latest_utc_ts = MySQLdb._mysql.escape_string(
                request.POST['latest_utc_ts'].strip())
            format = TP.getTimestampFormat(latest_utc_ts)

            if format == 1:
                end_time = latest_utc_ts
            if format == 2:
                end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'

    if 'iso_code' in request.POST:
        if cmp(request.POST['iso_code'], '') != 0:
            iso_code = MySQLdb._mysql.escape_string(
                request.POST['iso_code'].strip())
            where_clause = "where bi.country regexp '%s' " % iso_code
    """ 
        Format and execute query 
        ========================
    """

    query_name = 'report_country_impressions.sql'

    sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
    sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time,
                             start_time, end_time, where_clause)

    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    column_names = dl.get_column_names()

    imp_table = DR.DataReporting()._write_html_table(results, column_names)

    return render_to_response(
        'live_results/impression_list.html', {
            'imp_table': imp_table.decode("utf-8"),
            'err_msg': err_msg,
            'start': TP.timestamp_convert_format(start_time, 1, 2),
            'end': TP.timestamp_convert_format(end_time, 1, 2)
        },
        context_instance=RequestContext(request))

Example #9

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of live results data at:  %s' % self.CACHING_HOME)
        shelve_key = key
        
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')
            
        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'                    
        campaign_regexp_filter = '^C_|^C11_'
                
        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        
        """ Should a one-step query be used? """        
        use_one_step = lptl.is_one_step(start_time, end_time, 'C11')  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """
        
        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'
        
        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(latest_timestamp, 2, 1)
    
        ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range(start_time, end_time, campaign_regexp_filter, one_step=use_one_step)
        measured_metrics_counts = ret[1]
        
        """ Prepare Summary results """
        
        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)        
        
        logging.info('Executing report_summary_results ...')
        
        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()
        
        if use_one_step:
            
            logging.info('... including one step artifacts ...')
            
            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)
            
            results = list(results)        
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))
            
            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not(key in one_step_keys):
                    new_results.append(row)
            results = new_results
                
            results.extend(list(results_1S))
            
        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table()

        """ Create a interval loader objects """
        
        sampling_interval = 5 # 5 minute sampling interval for donation plots
        
        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
            
        """ Execute queries """        
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '',{})
        
        
        """ Prepare serialized objects """
        
        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table
        
        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS    
        
        dict_param['start_time'] = TP.timestamp_convert_format(start_time,1,2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
        
        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_
        
        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_
        
        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)
        
        logging.info('Caching complete.')

Example #10

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of fundraiser totals data at:  %s' % self.CACHING_HOME)        
                
        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        
        """ DATA CONFIG """
        
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')
        
        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_of_2011_fundraiser)
        countries.append('Total')
        
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:                
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']
        
        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_
        
        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
                        
        """ END CONFIG """
        
        
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
            
        dr = DR.DataReporting()
        
        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()
        
        for country in countries:
            
            key_2011 = '2011 ' +  country
            key_2010 = '2010 ' +  country
            
            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]
            
            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]
            
            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))
            
            dict_param[country] = Hlp.combine_data_lists(data)
        
        self.clear_cached_data(key)
        self.cache_data(dict_param, key)
        
        logging.info('Caching complete.')

Example #11

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

 def execute_process(self, key, **kwargs):
     
     logging.info('Commencing caching of long term trends data at:  %s' % self.CACHING_HOME)
     
     end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                       hours=-self.VIEW_DURATION_HRS, resolution=1)
     
     """ DATA CONFIG """
     
     countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
     countries = countries[1:6]
     
     """ set the metrics to plot """
     lttdl = DL.LongTermTrendsLoader(db='storage3')
             
     """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
     # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
     payment_groups = {'Credit Card' : ['^cc$'], 'Paypal': ['^pp$']}
     currency_groups = {'USD' : ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)']}
     lang_cntry_groups = {'US': ['US..', '.{4}'], 'EN' : ['[^U^S]en', '.{4}']}
     
     top_cntry_groups = dict()
     for country in countries:
         top_cntry_groups[country] = [country, '.{2}']
     
     # To include click rate
     # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
     # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]
     
     metrics = ['impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate']
     weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
     metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
     groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
               lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
     
     """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ 
     group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                      ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                      ['country', 'language'], ['payment_method'], ['payment_method']]
     
     metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_]
     
     include_totals = [True, True, True, False, True, True, False, False, False, True]
     include_others = [True, True, True, False, True, True, True, True, True, False]
     hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
     time_unit = [TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR]
     
     data = list()
     
     """ END CONFIG """
     
     
     """ For each metric use the LongTermTrendsLoader to generate the data to plot """
     for index in range(len(metrics)):
         
         dr = DR.DataReporting()
         
         times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                         groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                         include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                         time_unit=time_unit[index])
         
         times = TP.normalize_timestamps(times, False, time_unit[index])
         
         dr._counts_ = counts
         dr._times_ = times
   
         empty_data = [0] * len(times[times.keys()[0]])
         data.append(dr.get_data_lists([''], empty_data))
         
     dict_param = Hlp.combine_data_lists(data)
     dict_param['interval'] = self.VIEW_DURATION_HRS    
     dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
     
     self.clear_cached_data(key)
     self.cache_data(dict_param, key)
     
     logging.info('Caching complete.')

Example #12

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def index(request):
    
    
    """ 
        PROCESS POST DATA
        ================= 
        
        Escape all user input that can be entered in text fields 
        
    """
    try:
        campaign_regexp_filter = MySQLdb._mysql.escape_string(request.POST['campaign_regexp_filter'])
        
        if cmp(campaign_regexp_filter, '') == 0:
            campaign_regexp_filter = '^C_|^C11_'
    except:
        campaign_regexp_filter = '^C_|^C11_'

    try:
        min_donation = MySQLdb._mysql.escape_string(request.POST['min_donation'].strip())
        min_donation = int(min_donation)
    
    except:
        min_donation = 0
    
    # Filter on ISO codes to include matched countries
    try:
        iso_filter = MySQLdb._mysql.escape_string(request.POST['iso_filter'].strip())        
    
    except:
        iso_filter = '.{2}'
        
    
    """
        Call up cached results
    """     
    
    cache = DC.LiveResults_DataCaching()
    dict_param = cache.retrieve_cached_data(view_keys.LIVE_RESULTS_DICT_KEY)
    
    measured_metrics_counts = dict_param['measured_metrics_counts']
    results = dict_param['results']
    column_names = dict_param['column_names']
    sampling_interval = dict_param['interval']    
    duration_hrs = dict_param['duration']
    
    start_time = dict_param['start_time']
    end_time = dict_param['end_time']
        
    ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False)
    ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False)
    ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False)
    
    ir_cmpgn._counts_ = dict_param['ir_cmpgn_counts']
    ir_banner._counts_ = dict_param['ir_banner_counts']
    ir_lp._counts_ = dict_param['ir_lp_counts']
    
    ir_cmpgn._times_ = dict_param['ir_cmpgn_times']
    ir_banner._times_ = dict_param['ir_banner_times']
    ir_lp._times_ = dict_param['ir_lp_times']

    metric_legend_table = dict_param['metric_legend_table']
    conf_legend_table = dict_param['conf_legend_table']

    
    """ Filtering -- donations and artifacts """
    
    country_index = column_names.index('country')
    donations_index = column_names.index('donations')
    campaign_index = column_names.index('utm_campaign')
    new_results = list()
    
    # minimum d
    for row in results:
        try:
            if row[donations_index] > min_donation and re.search(campaign_regexp_filter, row[campaign_index]) and re.search(iso_filter, row[country_index]):
                new_results.append(list(row))
        except:
            logging.error('live_results/views.py -- Could not process row: %s' % str(row))

    results = new_results
    
    new_measured_metrics_counts = dict()
    for metric in measured_metrics_counts:        
        new_measured_metrics_counts[metric] = dict()
        
        for artifact_key in measured_metrics_counts[metric]:
            if re.search(campaign_regexp_filter, artifact_key):
                new_measured_metrics_counts[metric][artifact_key] = measured_metrics_counts[metric][artifact_key]
         
    """ 
        Format results to encode html table cell markup in results        
    """

    ret = DR.ConfidenceReporting(query_type='', hyp_test='').get_confidence_on_time_range(None, None, None, measured_metrics_counts=new_measured_metrics_counts) # first get color codes on confidence
    conf_colour_code = ret[0]
    
    for row_index in range(len(results)):
        artifact_index = results[row_index][0] + '-' + results[row_index][1] + '-' + results[row_index][2]
        
        for col_index in range(len(column_names)):
            
            is_coloured_cell = False
            if column_names[col_index] in conf_colour_code.keys():
                if artifact_index in conf_colour_code[column_names[col_index]].keys():
                    results[row_index][col_index] = '<td style="background-color:' + conf_colour_code[column_names[col_index]][artifact_index] + ';">' + str(results[row_index][col_index]) + '</td>'
                    is_coloured_cell = True
                    
            if not(is_coloured_cell):
                results[row_index][col_index] = '<td>' + str(results[row_index][col_index]) + '</td>'
                
    if results:
        summary_table = DR.DataReporting()._write_html_table(results, column_names, use_standard_metric_names=True, omit_cell_markup=True)
    else:
        summary_table = '<p><font size="4">No data available.</font></p>'
        
    summary_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \
    '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + summary_table


        
    """ 
        Prepare Live Plots
    """
    
    """ compose a list of zero data """    
    empty_data = [[1.0, 0.0]] * (duration_hrs * 60 / sampling_interval + 1)
    for i in range(len(empty_data)):
        empty_data[i][0] = empty_data[i][0] * i *  sampling_interval
        
    """ Extract data from interval reporting objects """        
    cmpgn_data_dict = ir_cmpgn.get_data_lists(['C_', 'C11_', campaign_regexp_filter], empty_data)
    cmpgn_banner_dict = ir_banner.get_data_lists(['B_', 'B11_'], empty_data)
    cmpgn_lp_dict = ir_lp.get_data_lists(['L11_', '^cc'], empty_data)
        
        
    """  
        Build template parameters
    """
    
    template_dict = Hlp.combine_data_lists([cmpgn_data_dict, cmpgn_banner_dict, cmpgn_lp_dict]) # combine the separate data sets
    template_dict['summary_table'] = summary_table
    template_dict['latest_log_end_time'] = end_time
    template_dict['start_time'] = start_time
    
    return render_to_response('live_results/index.html', template_dict, context_instance=RequestContext(request))

Example #13

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def daily_totals(request):    

    err_msg = ''
    
    start_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    
    """
        PROCESS POST
    """
    
    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'
            
    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:                
                min_donation = int(MySQLdb._mysql.escape_string(request.POST['min_donation'].strip()))
            except:
                logging.error('live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip())
                min_donation = 0
    
    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
            
    """
        === END POST ===
    """
    
    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__+ query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str)
    
    dl = DL.DataLoader()    
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(results, dl.get_column_names(), use_standard_metric_names=True)
    
    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))

Example #14

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of fundraiser totals data at:  %s' %
                     self.CACHING_HOME)

        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        """ DATA CONFIG """
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')

        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(
            start_of_2011_fundraiser)
        countries.append('Total')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']

        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_

        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """

        dr = DR.DataReporting()

        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()

        for country in countries:

            key_2011 = '2011 ' + country
            key_2010 = '2010 ' + country

            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]

            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]

            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))

            dict_param[country] = Hlp.combine_data_lists(data)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')

Example #15

0

Show file

File: views.py Project: rfaulkner/WMF_Analytics

def daily_totals(request):

    err_msg = ''

    start_day_ts = TP.timestamp_from_obj(
        datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    """
        PROCESS POST
    """

    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(
                request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(
                request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'

    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:
                min_donation = int(
                    MySQLdb._mysql.escape_string(
                        request.POST['min_donation'].strip()))
            except:
                logging.error(
                    'live_results/daily_totals -- Could not process minimum donation for "%s" '
                    % request.POST['min_donation'].strip())
                min_donation = 0

    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
    """
        === END POST ===
    """

    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__ + query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name,
                                sql_stmnt, [start_day_ts, end_day_ts],
                                country=country,
                                min_donation=min_donation,
                                order_str=order_str)

    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(
        results, dl.get_column_names(), use_standard_metric_names=True)

    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))

Example #16

0

Show file

File: QueryData.py Project: rfaulkner/WMF_Analytics

def format_query(query_name, sql_stmnt, args, **kwargs):
    
    country, min_donation, order_str = process_kwargs(kwargs)
    
    if cmp(query_name, 'report_campaign_ecomm') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))

    elif cmp(query_name, 'report_campaign_logs') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time, start_time, start_time))

    elif cmp(query_name, 'report_campaign_ecomm_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time, '%'))

    elif cmp(query_name, 'report_impressions_country') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_min') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_non_US_clicks') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_contribution_tracking') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', '%',start_time))
    
    elif cmp(query_name, 'report_total_amounts_by_hr') == 0:
        start_time = args[0]
        end_time = args[1]            
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time))
    
    elif cmp(query_name, 'report_total_amounts_by_day') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '', start_time, end_time))
    
    elif cmp(query_name, 'report_LP_metrics') == 0 or cmp(query_name, 'report_LP_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
        
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    elif cmp(query_name, 'report_banner_metrics') == 0 or cmp(query_name, 'report_bannerLP_metrics') == 0 or cmp(query_name, 'report_total_metrics') == 0 or \
    cmp(query_name, 'report_banner_metrics_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_1S') == 0 or cmp(query_name, 'report_total_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
            
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, country, start_time, end_time, campaign, country, start_time, end_time, country, \
                                     start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    
    elif cmp(query_name, 'report_latest_campaign') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))
            
    elif cmp(query_name, 'report_banner_impressions_by_hour') == 0:
        start = args[0]
        end = args[1]
        sql_stmnt = str(sql_stmnt % ('%','%','%','%', start, end))
                
    elif cmp(query_name, 'report_ecomm_by_amount') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', start_time, end_time, end_time))
    
    elif cmp(query_name, 'report_ecomm_by_contact') == 0:
        where_str = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', where_str))
    
    elif cmp(query_name, 'report_LP_metrics_minutely') == 0 or cmp(query_name, 'report_LP_metrics_minutely_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                 start_time, end_time, campaign, country, campaign))
    
    elif cmp(query_name, 'report_banner_metrics_minutely') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely') == 0 or cmp(query_name, 'report_banner_metrics_minutely_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely_1S') == 0:
    
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, imp_start_time_obj_str, end_time, \
                                     country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, \
                                country, start_time, end_time, campaign, country, campaign, ))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_1S') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_total') == 0 \
    or cmp(query_name, 'report_campaign_metrics_minutely_total_1S') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % (campaign, '%', '%', '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country))
        
    elif cmp(query_name, 'report_campaign_totals') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time))
    
    elif cmp(query_name, 'report_campaign_banners') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
        
    elif cmp(query_name, 'report_campaign_lps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_bannerlps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely_all') == 0 or cmp(query_name, 'report_banner_metrics_minutely_all') == 0 or cmp(query_name, 'report_lp_metrics_minutely_all') == 0:
        start_time = args[0]
        end_time = args[1]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, start_time, end_time))

    elif cmp(query_name, 'report_donation_metrics') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country))

    elif cmp(query_name, 'report_total_donations') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        """ Recursively construct the sub-query """
        sub_query_name = 'report_donation_metrics'
        sub_query_sql = Hlp.file_to_string(projSet.__sql_home__ + sub_query_name + '.sql')
        sub_query_sql = format_query(sub_query_name, sub_query_sql, [start_time, end_time, campaign], country=country)        

        sql_stmnt = str(sql_stmnt % sub_query_sql)

    elif cmp(query_name, 'report_daily_totals_by_country') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, end_time, country, min_donation, order_str))


    else:
        return 'no such table\n'

    return sql_stmnt

Example #17

0

Show file

File: main_training.py Project: vedvalsangkar/Unconstrained-Face-Recognizer

def main(batch_size, num_epochs, lr, file_write, flag_dummy, temperature,
         lr_decay, features):
    """
    Main function for training.
    :param batch_size  : Batch size to use.
    :param num_epochs  : Number of epochs for each split.
    :param lr          : Learning rate to be set at the start of each split.
    :param file_write  : Write output to stdout(default) or a file.
    :param flag_dummy  : Create a dummy file for evaluation.
    :param temperature : Set default temperature for softmax/log_softmax layer while training.
    :param lr_decay    : Learning rate decay for every drop in min loss observed.
    :param features    : Number of nodes for penultimate feature layer.
    :return:
    """

    # ''' ---------------------Parameters---------------------'''
    device = torch.device("cuda:0" if cuda.is_available() else "cpu")

    # optim_name = 'SGD'
    # optim_name = 'RMS'
    optim_name = 'Adam'

    batch_print = 50
    op_dir = "pickles/"

    t_stmp = time.strftime("%Y%m%d_%H%M%S", time.gmtime())

    # Creating a file to store the name of the latest models
    ff = open("latest_t_stmp.txt", 'w')
    ff.write("A2_T{0}_S".format(t_stmp))
    ff.close()

    helper = Helper("log/log_" + t_stmp + ".txt")
    helper.write_file(file_write)

    helper.log(msg="Starting data loading.")

    training_set, training_loader = helper.get_data(
        mode="train", training_batch_size=batch_size)
    helper.log(msg="Finished data loading. Starting main training.")

    for set_n in range(1, 11):

        init_lr = lr

        model, criterion, optimizer = mod.get_model(device,
                                                    optim_name,
                                                    lamb=0,
                                                    learning_rate=init_lr,
                                                    final_features=features)
        model.train(True)
        model.set_temperature(temperature)

        if flag_dummy:
            helper.log(msg="\nCreating dummy file.\n")
            dummy_file = {
                "model": model.state_dict(),
                "criterion": criterion.state_dict(),
                "optimizer": optimizer.state_dict(),
                "optim_name": optim_name,
                "features": features
            }
            torch.save(dummy_file, op_dir + "dummy.pt")
            flag_dummy = False

        helper.log(msg="\nStart of split {0}\n".format(set_n))

        total_len = len(training_loader[set_n])

        running_loss = 0.0
        cor = 0
        tot = 0
        cor_b = 0
        tot_b = 0
        past_loss = 6.0 * batch_print

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(training_loader[set_n]):

                # Change variable type to match GPU requirements
                inp = images.to(device)
                lab = labels.to(device)

                # Reset gradients before processing
                optimizer.zero_grad()

                # Get model output
                out = model(inp)

                # Calculate loss
                loss = criterion(out, lab)

                # Accuracy calc
                _, predicted = torch.max(out.data, 1)

                tot_b += batch_size
                cor_b += (predicted == lab).sum().item()

                tot += batch_size
                cor += (predicted == lab).sum().item()

                # Update weights
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                # logger.log(msg="\rLoss = {0}        ".format(l), end="")
                if (i + 1) % batch_print == 0:

                    helper.log(
                        msg="Split: {3}, Epoch: {0}, step: {1}/{2} ".format(
                            epoch + 1, i + 1, total_len, set_n),
                        end="\t")

                    helper.log(
                        msg="Running Loss (avg): {0:.06f}, Past: {1:.06f}".
                        format((running_loss / batch_print),
                               (past_loss / batch_print)),
                        end="\t")
                    helper.log(
                        msg="Accuracy: (Per {2})|(Total): {0:.03f}|{1:.03f} %".
                        format((cor_b * 100) / tot_b, (cor * 100) / tot,
                               batch_size * batch_print),
                        end="\t")

                    if running_loss < past_loss:
                        past_loss = running_loss
                        init_lr *= lr_decay
                        for params in optimizer.param_groups:
                            params['lr'] = max(init_lr, 0.001)

                    helper.log(msg="LR: {0:.06f}".format(init_lr))

                    running_loss = 0.0
                    cor_b = 0
                    tot_b = 0

        filename = op_dir + "A2_T{1}_S{0}.pt".format(set_n, t_stmp)

        # Idea for named saved file was picked up from here:
        # https://github.com/quiltdata/pytorch-examples/blob/master/imagenet/main.py
        save_file = {
            "model": model.state_dict(),
            "criterion": criterion.state_dict(),
            "optimizer": optimizer.state_dict(),
            "optim_name": optim_name,
            "features": features
        }

        torch.save(save_file, filename)
        helper.log(
            msg="\nFile {0} saved for split {1}".format(filename, set_n))

    helper.close()