Example #1
0
def process_filter_data(request):

    err_msg = ""

    time_curr = datetime.datetime.utcnow()
    time_dayback = time_curr + datetime.timedelta(hours=-4)

    _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3)
    _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3)

    """ 
        PROCESS POST VARS 
        =================
    """

    try:

        latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["latest_utc_ts"].strip())

        if not (TP.is_timestamp(latest_utc_ts_var, 1)) and not (TP.is_timestamp(latest_utc_ts_var, 2)):
            raise TypeError

        if latest_utc_ts_var == "":
            latest_utc_ts_var = _end_time_

        ts_format = TP.getTimestampFormat(latest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)

    except KeyError:
        latest_utc_ts_var = _end_time_

    except TypeError:
        err_msg = "Please enter a valid end-timestamp."
        latest_utc_ts_var = _end_time_

    try:

        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["earliest_utc_ts"].strip())

        if not (TP.is_timestamp(earliest_utc_ts_var, 1)) and not (TP.is_timestamp(earliest_utc_ts_var, 2)):
            raise TypeError

        if earliest_utc_ts_var == "":
            earliest_utc_ts_var = _beginning_time_

        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            earliest_utc_ts_var = TP.timestamp_convert_format(
                earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT
            )

    except KeyError:
        earliest_utc_ts_var = _beginning_time_

    except TypeError:
        err_msg = "Please enter a valid start-timestamp."
        earliest_utc_ts_var = _beginning_time_

    return err_msg, earliest_utc_ts_var, latest_utc_ts_var
Example #2
0
    def get_timestamps_with_interval(self, logFileName, interval):

        log_end = self.get_timestamps(logFileName)[1]

        end_obj = TP.timestamp_to_obj(log_end, 1)
        start_obj = end_obj + datetime.timedelta(minutes=-interval)

        start_timestamp = TP.timestamp_from_obj(start_obj, 1, 2)
        end_timestamp = TP.timestamp_from_obj(end_obj, 1, 2)

        return [start_timestamp, end_timestamp]
Example #3
0
def format_query(query_name, sql_stmnt, args, **kwargs):
    
    country, min_donation, order_str = process_kwargs(kwargs)
    
    if cmp(query_name, 'report_campaign_ecomm') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))

    elif cmp(query_name, 'report_campaign_logs') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time, start_time, start_time))

    elif cmp(query_name, 'report_campaign_ecomm_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time, '%'))

    elif cmp(query_name, 'report_impressions_country') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_min') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_non_US_clicks') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_contribution_tracking') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', '%',start_time))
    
    elif cmp(query_name, 'report_total_amounts_by_hr') == 0:
        start_time = args[0]
        end_time = args[1]            
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time))
    
    elif cmp(query_name, 'report_total_amounts_by_day') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '', start_time, end_time))
    
    elif cmp(query_name, 'report_LP_metrics') == 0 or cmp(query_name, 'report_LP_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
        
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    elif cmp(query_name, 'report_banner_metrics') == 0 or cmp(query_name, 'report_bannerLP_metrics') == 0 or cmp(query_name, 'report_total_metrics') == 0 or \
    cmp(query_name, 'report_banner_metrics_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_1S') == 0 or cmp(query_name, 'report_total_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
            
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, country, start_time, end_time, campaign, country, start_time, end_time, country, \
                                     start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    
    elif cmp(query_name, 'report_latest_campaign') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))
            
    elif cmp(query_name, 'report_banner_impressions_by_hour') == 0:
        start = args[0]
        end = args[1]
        sql_stmnt = str(sql_stmnt % ('%','%','%','%', start, end))
                
    elif cmp(query_name, 'report_ecomm_by_amount') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', start_time, end_time, end_time))
    
    elif cmp(query_name, 'report_ecomm_by_contact') == 0:
        where_str = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', where_str))
    
    elif cmp(query_name, 'report_LP_metrics_minutely') == 0 or cmp(query_name, 'report_LP_metrics_minutely_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                 start_time, end_time, campaign, country, campaign))
    
    elif cmp(query_name, 'report_banner_metrics_minutely') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely') == 0 or cmp(query_name, 'report_banner_metrics_minutely_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely_1S') == 0:
    
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, imp_start_time_obj_str, end_time, \
                                     country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, \
                                country, start_time, end_time, campaign, country, campaign, ))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_1S') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_total') == 0 \
    or cmp(query_name, 'report_campaign_metrics_minutely_total_1S') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % (campaign, '%', '%', '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country))
        
    elif cmp(query_name, 'report_campaign_totals') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time))
    
    elif cmp(query_name, 'report_campaign_banners') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
        
    elif cmp(query_name, 'report_campaign_lps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_bannerlps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely_all') == 0 or cmp(query_name, 'report_banner_metrics_minutely_all') == 0 or cmp(query_name, 'report_lp_metrics_minutely_all') == 0:
        start_time = args[0]
        end_time = args[1]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, start_time, end_time))

    elif cmp(query_name, 'report_donation_metrics') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country))

    elif cmp(query_name, 'report_total_donations') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        """ Recursively construct the sub-query """
        sub_query_name = 'report_donation_metrics'
        sub_query_sql = Hlp.file_to_string(projSet.__sql_home__ + sub_query_name + '.sql')
        sub_query_sql = format_query(sub_query_name, sub_query_sql, [start_time, end_time, campaign], country=country)        

        sql_stmnt = str(sql_stmnt % sub_query_sql)

    elif cmp(query_name, 'report_daily_totals_by_country') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, end_time, country, min_donation, order_str))


    else:
        return 'no such table\n'

    return sql_stmnt
Example #4
0
    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of fundraiser totals data at:  %s' %
                     self.CACHING_HOME)

        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        """ DATA CONFIG """
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')

        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(
            start_of_2011_fundraiser)
        countries.append('Total')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']

        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_

        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """

        dr = DR.DataReporting()

        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()

        for country in countries:

            key_2011 = '2011 ' + country
            key_2010 = '2010 ' + country

            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]

            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]

            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))

            dict_param[country] = Hlp.combine_data_lists(data)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')
Example #5
0
    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of live results data at:  %s' %
                     self.CACHING_HOME)
        shelve_key = key
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')

        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'
        campaign_regexp_filter = '^C_|^C11_'

        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(
            datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        """ Should a one-step query be used? """
        use_one_step = lptl.is_one_step(
            start_time, end_time, 'C11'
        )  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """

        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'

        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(
            latest_timestamp, 2, 1)

        ret = DR.ConfidenceReporting(query_type='', hyp_test='',
                                     db='db1025').get_confidence_on_time_range(
                                         start_time,
                                         end_time,
                                         campaign_regexp_filter,
                                         one_step=use_one_step)
        measured_metrics_counts = ret[1]
        """ Prepare Summary results """

        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

        logging.info('Executing report_summary_results ...')

        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()

        if use_one_step:

            logging.info('... including one step artifacts ...')

            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ +
                                              query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

            results = list(results)
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))

            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not (key in one_step_keys):
                    new_results.append(row)
            results = new_results

            results.extend(list(results_1S))

        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(
            query_type='bannerlp',
            hyp_test='TTest').get_confidence_legend_table()
        """ Create a interval loader objects """

        sampling_interval = 5  # 5 minute sampling interval for donation plots

        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ +
                                        FDH._QTYPE_TIME_,
                                        generate_plot=False,
                                        db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ +
                                         FDH._QTYPE_TIME_,
                                         generate_plot=False,
                                         db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ +
                                     FDH._QTYPE_TIME_,
                                     generate_plot=False,
                                     db='db1025')
        """ Execute queries """
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',
                     {})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',
                      {})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {})
        """ Prepare serialized objects """

        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table

        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS

        dict_param['start_time'] = TP.timestamp_convert_format(
            start_time, 1, 2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_

        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_

        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)

        logging.info('Caching complete.')
Example #6
0
def index(request, **kwargs):

    crl = DL.CampaignReportingLoader(query_type='totals')
    filter_data = True
    """ Determine the start and end times for the query """
    start_time_obj = datetime.datetime.utcnow() + datetime.timedelta(days=-1)
    end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
    start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
    """ 
        PROCESS POST KWARGS 
        ===================
    """

    err_msg = ''
    try:
        err_msg = str(kwargs['kwargs']['err_msg'])
    except:
        pass
    """ 
        PROCESS POST VARS 
        =================                
    """
    """ Process error message """
    try:
        err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg'])
    except KeyError:
        pass
    """ If the filter form was submitted extract the POST vars  """
    try:
        min_donations_var = MySQLdb._mysql.escape_string(
            request.POST['min_donations'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(
            request.POST['utc_ts'].strip())
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var,
                                                     TP.TS_FORMAT_FORMAT1,
                                                     TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var
        elif cmp(earliest_utc_ts_var, '') == 0:
            start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
        else:
            raise Exception()

        if cmp(min_donations_var, '') == 0:
            min_donations_var = -1
        else:
            min_donations_var = int(min_donations_var)

    except KeyError:  # In the case the form was not submitted set minimum donations and retain the default start time

        min_donations_var = -1
        pass

    except Exception:  # In the case the form was incorrectly formatted notify the user

        min_donations_var = -1
        start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
        err_msg = 'Filter fields are incorrect.'
    """ 
        GENERATE CAMPAIGN DATA 
        ======================
        
    """
    campaigns, all_data = crl.run_query({
        'metric_name': 'earliest_timestamp',
        'start_time': start_time,
        'end_time': end_time
    })
    """ Sort campaigns by earliest access """
    sorted_campaigns = sorted(campaigns.iteritems(),
                              key=operator.itemgetter(1))
    sorted_campaigns.reverse()
    """ 
        FILTER CAMPAIGN DATA
        ====================
        
    """

    new_sorted_campaigns = list()
    for campaign in sorted_campaigns:
        key = campaign[0]

        if campaign[1] > 0:
            name = all_data[key][0]
            if name == None:
                name = 'none'

            timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2)

            if filter_data:
                if all_data[key][2] > min_donations_var:
                    new_sorted_campaigns.append([
                        campaign[0], campaign[1], name, timestamp,
                        all_data[key][2], all_data[key][4]
                    ])
            else:
                new_sorted_campaigns.append([
                    campaign[0], campaign[1], name, timestamp,
                    all_data[key][2], all_data[key][4]
                ])

    sorted_campaigns = new_sorted_campaigns

    return render_to_response('campaigns/index.html', {
        'campaigns': sorted_campaigns,
        'err_msg': err_msg
    },
                              context_instance=RequestContext(request))
Example #7
0
    def mine_squid_landing_page_requests(self, logFileName):

        logging.info("Begin mining of landing page requests in %s" % logFileName)

        """ Create the dataloaders and initialize """
        sltl = DL.SquidLogTableLoader()
        lptl = DL.LandingPageTableLoader()
        ipctl = DL.IPCountryTableLoader()

        """ Retrieve the log timestamp from the filename """
        time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_)

        end = time_stamps[1]
        curr_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)

        """ retrieve the start time of the log """
        start = self.get_first_timestamp_from_log(logFileName)

        """ Initialization - open the file """
        logFile, total_lines_in_file = self.open_logfile(logFileName)

        # Initialization
        hostIndex = 1
        queryIndex = 4
        pathIndex = 2

        """ Clear the old records """
        self._clear_squid_records(start, self._LP_REQUEST_)

        """ Add a row to the SquidLogTable """
        sltl.insert_row(
            type="lp_view",
            log_copy_time=curr_time,
            start_time=start,
            end_time=end,
            log_completion_pct="0.0",
            total_rows="0",
        )

        line_count = 0
        requests_loaded = 0

        """ Extract the mining patterns from the DB """
        mptl = DL.MiningPatternsTableLoader()
        lp_patterns = mptl.get_pattern_lists()[1]

        """
            PROCESS REQUESTS FROM FILE
            ==========================
            
            Sample request:
            
            line = 
                "sq63.wikimedia.org 757671483 2011-06-01T23:00:01.343 93 98.230.113.246 TCP_MISS/200 10201 GET \
                http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg CARP/208.80.152.83 text/html http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg \
                - Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C)"
        """

        line = logFile.readline()
        while line != "":

            lineArgs = line.split()

            """ Get the IP Address of the donor """
            ip_add = lineArgs[4]

            #  SELECT CAST('20070529 00:00:00' AS datetime)

            """ 
                Parse the Timestamp:
                
                Sample timestamp:
                    timestamp = "2011-06-01T23:00:07.612"
            """

            date_and_time = lineArgs[2]

            date_string = date_and_time.split("-")
            time_string = date_and_time.split(":")

            # if the date is not logged ignoere the record
            try:
                year = date_string[0]
                month = date_string[1]
                day = date_string[2][:2]
                hour = time_string[0][-2:]
                min = time_string[1]
                sec = time_string[2][:2]
            except:
                line = logFile.readline()
                total_lines_in_file = total_lines_in_file - 1
                continue

            timestamp_string = year + "-" + month + "-" + day + " " + hour + ":" + min + ":" + sec

            """ 
                Process referrer URL
                =================== 
                
                Sample referrer:
                    referrer_url = http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg
            """

            try:
                referrer_url = lineArgs[11]
            except IndexError:
                referrer_url = "Unavailable"

            parsed_referrer_url = up.urlparse(referrer_url)

            if parsed_referrer_url[hostIndex] == None:
                project = "NONE"
                source_lang = "NONE"
            else:
                hostname = parsed_referrer_url[hostIndex].split(".")

                """ If the hostname of the form '<lang>.<project>.org' """
                if len(hostname[0]) <= 2:
                    # referrer_path = parsed_referrer_url[pathIndex].split('/')
                    project = hostname[0]  # wikimediafoundation.org
                    source_lang = hostname[0]
                else:
                    try:
                        """ species.wikimedia vs en.wikinews """
                        project = hostname[0] if (hostname[1] == "wikimedia") else hostname[1]
                        """ pl.wikipedia vs commons.wikimedia """
                        source_lang = hostname[0] if (len(hostname[1]) < 5) else "en"
                    except:
                        project = "wikipedia" """ default project to 'wikipedia' """
                        source_lang = "en" """ default lang to english """

            """
                Process User agent string
                ========================
                
                sample user agent string:
                    user_agent_string = Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C)
                
            """

            try:
                user_agent_string = lineArgs[13]
            except IndexError:
                user_agent_string = ""

            try:
                user_agent_fields = httpagentparser.detect(user_agent_string)
                browser = "NONE"

                # Check to make sure fields exist
                if len(user_agent_fields["browser"]) != 0:
                    if len(user_agent_fields["browser"]["name"]) != 0:
                        browser = user_agent_fields["browser"]["name"]
            except:

                logging.error("Could not process user agent string.")
                browser = "NONE"

            """
                 Process landing URL
                 ===================
                 
                 sample landing urls:
                 
                     landing_url = "http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg"
                     landing_url = "http://wikimediafoundation.org/wiki/WMFJA1/ru"
                     landing_url = *donate.wikimedia.org/wiki/Special:FundraiserLandingPage?uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107
            """

            try:
                landing_url = lineArgs[8]
            except IndexError:
                landing_url = "Unavailable"

            hostIndex = 1
            queryIndex = 4
            pathIndex = 2

            parsed_landing_url = up.urlparse(landing_url)
            query_fields = cgi.parse_qs(parsed_landing_url[queryIndex])  # Get the banner name and lang
            path_pieces = parsed_landing_url[pathIndex].split("/")

            include_request, url_match = self.evaluate_landing_url(
                landing_url, parsed_landing_url, query_fields, path_pieces, lp_patterns
            )

            if include_request:

                """ Extract the language from the query string 
                        
                    the language has already been read from the url path but if it
                    exists in  the query string this setting should take precedence
                """
                try:
                    source_lang = query_fields["language"][0]
                except:
                    pass

                """ Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... """
                # http://wikimediafoundation.org/wiki/
                if url_match == 1:

                    """ Address cases where the query string does not contain the landing page - ...wikimediafoundation.org/wiki/... """
                    parsed_landing_url = up.urlparse(landing_url)
                    query_fields = cgi.parse_qs(parsed_landing_url[queryIndex])  # Get the banner name and lang

                    landing_path = parsed_landing_url[pathIndex].split("/")
                    landing_page = landing_path[2]

                    # URLs of the form ...?county_code=<iso_code>
                    try:
                        country = query_fields["country"][0]

                    # URLs of the form ...<path>/ <lp_name>/<lang>/<iso_code>
                    except:
                        try:
                            if len(landing_path) == 5:
                                country = landing_path[4]
                                # source_lang = landing_path[3]
                            else:
                                country = landing_path[3]

                        except:

                            logging.info("Could not parse country from landing path: %s", landing_url)
                            line = logFile.readline()
                            total_lines_in_file = total_lines_in_file - 1
                            continue

                # http://wikimediafoundation.org/w/index.php?
                elif url_match == 2:

                    try:

                        """ URLs of the form ...?title=<lp_name> """
                        lp_country = query_fields["title"][0].split("/")
                        landing_page = lp_country[0]

                        """ URLs of the form ...?county_code=<iso_code> """
                        try:
                            country = query_fields["country"][0]
                        except:
                            """ URLs of the form ...?title=<lp_name>/<lang>/<iso_code> """
                            if len(lp_country) == 3:
                                country = lp_country[2]
                            else:
                                country = lp_country[1]

                    except:

                        logging.info("Could not parse landing page request from query string: %s", landing_url)
                        line = logFile.readline()
                        total_lines_in_file = total_lines_in_file - 1
                        continue

                # donate.wikimedia.org/wiki/Special:FundraiserLandingPage?
                elif url_match == 3:

                    try:
                        # e.g. uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107

                        source_lang = query_fields["uselang"][0]
                        country = query_fields["country"][0]

                        landing_page = (
                            query_fields["template"][0].split("-")[2]
                            + "~"
                            + query_fields["appeal-template"][0].split("-")[2]
                            + "~"
                            + query_fields["appeal"][0].split("-")[1]
                            + "~"
                            + query_fields["form-template"][0].split("-")[2]
                            + "~"
                            + query_fields["form-countryspecific"][0].split("-")[2]
                        )

                        utm_source = query_fields["utm_source"][0]
                        utm_campaign = query_fields["utm_campaign"][0] + "_" + country
                        utm_medium = query_fields["utm_medium"][0]

                    except Exception as inst:

                        # logging.info(inst)     # __str__ allows args to printed directly
                        # logging.info('Could not parse landing page request from query string: %s', landing_url)
                        line = logFile.readline()
                        total_lines_in_file = total_lines_in_file - 1
                        continue

                """ If country is confused with the language use the ip """
                if country == country.lower():

                    # logging.info('Using geo-locator to set ip-address: %s', landing_url)
                    country = ipctl.localize_IP(ip_add)

                """ Ensure fields providing request ID exist """
                try:
                    utm_source = query_fields["utm_source"][0]
                    utm_campaign = query_fields["utm_campaign"][0]
                    utm_medium = query_fields["utm_medium"][0]

                except KeyError:

                    line = logFile.readline()
                    total_lines_in_file = total_lines_in_file - 1
                    continue

                """ Insert record into the landing_page_requests table """

                lptl.insert_row(
                    utm_source_arg=utm_source,
                    utm_campaign_arg=utm_campaign,
                    utm_medium_arg=utm_medium,
                    landing_page_arg=landing_page,
                    page_url_arg=landing_url,
                    referrer_url_arg=referrer_url,
                    browser_arg=browser,
                    lang_arg=source_lang,
                    country_arg=country,
                    project_arg=project,
                    ip_arg=ip_add,
                    start_timestamp_arg=start,
                    timestamp_arg=timestamp_string,
                )

                requests_loaded = requests_loaded + 1

            line = logFile.readline()
            line_count = line_count + 1

            """ Log Miner Logging - Update the squid_log_record table """
            if (line_count % 1000) == 0 or line_count == total_lines_in_file:
                completion = float(line_count / total_lines_in_file) * 100.0
                sltl.update_table_row(
                    type="lp_view",
                    log_copy_time=curr_time,
                    start_time=start,
                    end_time=end,
                    log_completion_pct=completion.__str__(),
                    total_rows=line_count.__str__(),
                )
Example #8
0
    def mine_squid_impression_requests(self, logFileName):

        logging.info("Begin mining of banner impressions in %s" % logFileName)

        sltl = DL.SquidLogTableLoader()
        itl = DL.ImpressionTableLoader()

        """ Retrieve the log timestamp from the filename """
        time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_)

        """ retrieve the start time of the log """
        start = self.get_first_timestamp_from_log(logFileName)

        end = time_stamps[1]
        curr_time = TP.timestamp_from_obj(datetime.datetime.now(), 1, 3)

        """ Initialization - open the file """
        logFile, total_lines_in_file = self.open_logfile(logFileName)
        queryIndex = 4

        counts = Hlp.AutoVivification()
        # insertStmt = 'INSERT INTO ' + self._impression_table_name_ + self._BANNER_FIELDS_ + ' values '

        """ Clear the old records """
        self._clear_squid_records(start, self._BANNER_REQUEST_)

        """ Add a row to the SquidLogTable """
        sltl.insert_row(
            type="banner_impression",
            log_copy_time=curr_time,
            start_time=start,
            end_time=end,
            log_completion_pct="0.0",
            total_rows="0",
        )

        """
            PROCESS LOG FILE
            ================
            
            Sample Request:
            
            line =
            "sq63.wikimedia.org 757675855 2011-06-01T23:00:07.612 0 187.57.227.121 TCP_MEM_HIT/200 1790 GET \
            http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR NONE/- text/javascript http://pt.wikipedia.org/wiki/Modo_e_tempo_verbal \
            - Mozilla/5.0%20(Windows%20NT%206.1)%20AppleWebKit/534.24%20(KHTML,%20like%20Gecko)%20Chrome/11.0.696.71%20Safari/534.24"

        """

        line_count = 0

        line = logFile.readline()
        while line != "":

            lineArgs = line.split()

            """ 
                Parse the Timestamp:
                
                Sample timestamp:
                    timestamp = "2011-06-01T23:00:07.612"
            """

            try:
                time_stamp = lineArgs[2]
                time_bits = time_stamp.split("T")
                date_fields = time_bits[0].split("-")
                time_fields = time_bits[1].split(":")
                time_stamp = date_fields[0] + date_fields[1] + date_fields[2] + time_fields[0] + time_fields[1] + "00"

            except (ValueError, IndexError):
                line = logFile.readline()
                total_lines_in_file = total_lines_in_file - 1
                continue
                # pass

            """ 
                Parse the URL:
                
                Sample url:
                    url = "http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR"
            """

            try:
                url = lineArgs[8]
            except IndexError:
                url = "Unavailable"

            parsedUrl = up.urlparse(url)
            query = parsedUrl[queryIndex]
            queryBits = cgi.parse_qs(query)

            """ Extract - project, banner, language, & country data from the url """
            project = ""
            if "db" in queryBits.keys():
                project = queryBits["db"][0]

            if project == "" and "sitename" in queryBits.keys():
                sitename = queryBits["sitename"][0]
                if sitename:
                    project = sitename
                else:
                    project = "NONE"

            if "banner" in queryBits.keys():
                banner = queryBits["banner"][0]
            else:
                banner = "NONE"

            if "userlang" in queryBits.keys():
                lang = queryBits["userlang"][0]
            else:
                lang = "NONE"

            if "country" in queryBits.keys():
                country = queryBits["country"][0]
            else:
                country = "NONE"

            """ Group banner impression counts based on (banner, country, project, language) """
            try:
                counts[banner][country][project][lang][time_stamp] = (
                    counts[banner][country][project][lang][time_stamp] + 1
                )
            except TypeError:
                counts[banner][country][project][lang][time_stamp] = 1

            line = logFile.readline()
            line_count = line_count + 1

            """ Log Miner Logging - Update the squid_log_record table """
            if line_count % 10000 == 0 or line_count == total_lines_in_file:
                completion = float(line_count / total_lines_in_file) * 100.0
                sltl.update_table_row(
                    type="banner_impression",
                    log_copy_time=curr_time,
                    start_time=start,
                    end_time=end,
                    log_completion_pct=completion.__str__(),
                    total_rows=line_count.__str__(),
                )

        """ ====== FILE COMPLETE ====== """
        logFile.close()

        """ 
            Break out impression data by minute.  This conditional detects when a request with a previously unseen minute in the timestamp appears.
         
            Run through the counts dictionary and insert a row into the banner impressions table for each entry 
        """

        bannerKeys = counts.keys()
        for banner_ind in range(len(bannerKeys)):
            banner = bannerKeys[banner_ind]
            countryCounts = counts[banner]
            countryKeys = countryCounts.keys()

            for country_ind in range(len(countryKeys)):
                country = countryKeys[country_ind]
                projectCounts = countryCounts[country]
                projectKeys = projectCounts.keys()

                for project_ind in range(len(projectKeys)):
                    project = projectKeys[project_ind]
                    langCounts = projectCounts[project]
                    langKeys = langCounts.keys()

                    for lang_ind in range(len(langKeys)):
                        lang = langKeys[lang_ind]
                        timestampCounts = langCounts[lang]
                        timestampKeys = timestampCounts.keys()

                        for timestamp_ind in range(len(timestampKeys)):
                            timestamp = timestampKeys[timestamp_ind]
                            count = timestampCounts[timestamp]

                            itl.insert_row(
                                utm_source_arg=banner,
                                referrer_arg=project,
                                country_arg=country,
                                lang_arg=lang,
                                counts_arg=str(count),
                                on_minute_arg=timestamp,
                                start_timestamp_arg=start,
                            )
Example #9
0
def daily_totals(request):

    err_msg = ''

    start_day_ts = TP.timestamp_from_obj(
        datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    """
        PROCESS POST
    """

    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(
                request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(
                request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'

    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:
                min_donation = int(
                    MySQLdb._mysql.escape_string(
                        request.POST['min_donation'].strip()))
            except:
                logging.error(
                    'live_results/daily_totals -- Could not process minimum donation for "%s" '
                    % request.POST['min_donation'].strip())
                min_donation = 0

    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
    """
        === END POST ===
    """

    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__ + query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name,
                                sql_stmnt, [start_day_ts, end_day_ts],
                                country=country,
                                min_donation=min_donation,
                                order_str=order_str)

    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(
        results, dl.get_column_names(), use_standard_metric_names=True)

    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))
Example #10
0
    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of fundraiser totals data at:  %s' % self.CACHING_HOME)        
                
        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        
        """ DATA CONFIG """
        
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')
        
        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_of_2011_fundraiser)
        countries.append('Total')
        
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:                
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']
        
        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_
        
        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
                        
        """ END CONFIG """
        
        
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
            
        dr = DR.DataReporting()
        
        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()
        
        for country in countries:
            
            key_2011 = '2011 ' +  country
            key_2010 = '2010 ' +  country
            
            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]
            
            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]
            
            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))
            
            dict_param[country] = Hlp.combine_data_lists(data)
        
        self.clear_cached_data(key)
        self.cache_data(dict_param, key)
        
        logging.info('Caching complete.')
Example #11
0
    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of live results data at:  %s' % self.CACHING_HOME)
        shelve_key = key
        
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')
            
        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'                    
        campaign_regexp_filter = '^C_|^C11_'
                
        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        
        """ Should a one-step query be used? """        
        use_one_step = lptl.is_one_step(start_time, end_time, 'C11')  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """
        
        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'
        
        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(latest_timestamp, 2, 1)
    
        ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range(start_time, end_time, campaign_regexp_filter, one_step=use_one_step)
        measured_metrics_counts = ret[1]
        
        """ Prepare Summary results """
        
        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)        
        
        logging.info('Executing report_summary_results ...')
        
        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()
        
        if use_one_step:
            
            logging.info('... including one step artifacts ...')
            
            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)
            
            results = list(results)        
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))
            
            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not(key in one_step_keys):
                    new_results.append(row)
            results = new_results
                
            results.extend(list(results_1S))
            
        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table()

        """ Create a interval loader objects """
        
        sampling_interval = 5 # 5 minute sampling interval for donation plots
        
        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
            
        """ Execute queries """        
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '',{})
        
        
        """ Prepare serialized objects """
        
        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table
        
        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS    
        
        dict_param['start_time'] = TP.timestamp_convert_format(start_time,1,2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
        
        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_
        
        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_
        
        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)
        
        logging.info('Caching complete.')
        
Example #12
0
def process_filter_data(request):
        
    err_msg = ''
    
    time_curr = datetime.datetime.utcnow()
    time_dayback = time_curr + datetime.timedelta(hours = -4)
     
    _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3)
    _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3)
    
    
    """ 
        PROCESS POST VARS 
        =================
    """
    
    try:
        
        latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip())
        
        if not(TP.is_timestamp(latest_utc_ts_var, 1)) and not(TP.is_timestamp(latest_utc_ts_var, 2)):
            raise TypeError

        if latest_utc_ts_var == '':
            latest_utc_ts_var = _end_time_
        
        ts_format = TP.getTimestampFormat(latest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
            
    except KeyError:        
        latest_utc_ts_var = _end_time_
    
    except TypeError:        
        err_msg = 'Please enter a valid end-timestamp.'        
        latest_utc_ts_var = _end_time_


    try:
        
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip())
        
        if not(TP.is_timestamp(earliest_utc_ts_var, 1)) and not(TP.is_timestamp(earliest_utc_ts_var, 2)):
            raise TypeError

        if earliest_utc_ts_var == '':
            earliest_utc_ts_var = _beginning_time_
        
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            earliest_utc_ts_var = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
            
    except KeyError:
        earliest_utc_ts_var = _beginning_time_
    
    except TypeError:        
        err_msg = 'Please enter a valid start-timestamp.'        
        earliest_utc_ts_var = _beginning_time_
        
        
    return err_msg, earliest_utc_ts_var, latest_utc_ts_var
Example #13
0
def daily_totals(request):    

    err_msg = ''
    
    start_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    
    """
        PROCESS POST
    """
    
    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'
            
    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:                
                min_donation = int(MySQLdb._mysql.escape_string(request.POST['min_donation'].strip()))
            except:
                logging.error('live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip())
                min_donation = 0
    
    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
            
    """
        === END POST ===
    """
    
    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__+ query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str)
    
    dl = DL.DataLoader()    
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(results, dl.get_column_names(), use_standard_metric_names=True)
    
    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))
Example #14
0
def index(request, **kwargs):
    
    crl = DL.CampaignReportingLoader(query_type='totals')
    filter_data = True
            
    """ Determine the start and end times for the query """ 
    start_time_obj =  datetime.datetime.utcnow() + datetime.timedelta(days=-1)
    end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(),1,3)    
    start_time = TP.timestamp_from_obj(start_time_obj,1,3)
    
    """ 
        PROCESS POST KWARGS 
        ===================
    """
    
    err_msg = ''
    try:
        err_msg = str(kwargs['kwargs']['err_msg'])
    except:
        pass
    
    """ 
        PROCESS POST VARS 
        =================                
    """
    
    """ Process error message """
    try:
        err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg'])
    except KeyError:
        pass

    """ If the filter form was submitted extract the POST vars  """
    try:
        min_donations_var = MySQLdb._mysql.escape_string(request.POST['min_donations'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['utc_ts'].strip())
        
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
    
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var
        elif cmp(earliest_utc_ts_var, '') == 0:
            start_time = TP.timestamp_from_obj(start_time_obj,1,3)
        else:
            raise Exception()
        
        if cmp(min_donations_var, '') == 0:
            min_donations_var = -1
        else:
            min_donations_var = int(min_donations_var)
    
    except KeyError: # In the case the form was not submitted set minimum donations and retain the default start time 
        
        min_donations_var = -1
        pass
    
    except Exception: # In the case the form was incorrectly formatted notify the user
        
        min_donations_var = -1
        start_time = TP.timestamp_from_obj(start_time_obj,1,3)      
        err_msg = 'Filter fields are incorrect.'
    


    """ 
        GENERATE CAMPAIGN DATA 
        ======================
        
    """
    campaigns, all_data = crl.run_query({'metric_name' : 'earliest_timestamp', 'start_time' : start_time, 'end_time' : end_time})

    """ Sort campaigns by earliest access """    
    sorted_campaigns = sorted(campaigns.iteritems(), key=operator.itemgetter(1))
    sorted_campaigns.reverse()
    
    """ 
        FILTER CAMPAIGN DATA
        ====================
        
    """

    new_sorted_campaigns = list()
    for campaign in sorted_campaigns:
        key = campaign[0]
        
        if campaign[1] > 0:
            name = all_data[key][0]
            if name  == None:
                name = 'none'
            
            timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2)
            
            if filter_data: 
                if all_data[key][2] > min_donations_var:
                    new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]])
            else:
                new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]])
    
    sorted_campaigns = new_sorted_campaigns

    return render_to_response('campaigns/index.html', {'campaigns' : sorted_campaigns, 'err_msg' : err_msg}, context_instance=RequestContext(request))