Python TimestampProcessor 예제들, classes.TimestampProcessor Python 예제들

예제 #1

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def process_filter_data(request):

    err_msg = ""

    time_curr = datetime.datetime.utcnow()
    time_dayback = time_curr + datetime.timedelta(hours=-4)

    _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3)
    _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3)

    """ 
        PROCESS POST VARS 
        =================
    """

    try:

        latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["latest_utc_ts"].strip())

        if not (TP.is_timestamp(latest_utc_ts_var, 1)) and not (TP.is_timestamp(latest_utc_ts_var, 2)):
            raise TypeError

        if latest_utc_ts_var == "":
            latest_utc_ts_var = _end_time_

        ts_format = TP.getTimestampFormat(latest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)

    except KeyError:
        latest_utc_ts_var = _end_time_

    except TypeError:
        err_msg = "Please enter a valid end-timestamp."
        latest_utc_ts_var = _end_time_

    try:

        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["earliest_utc_ts"].strip())

        if not (TP.is_timestamp(earliest_utc_ts_var, 1)) and not (TP.is_timestamp(earliest_utc_ts_var, 2)):
            raise TypeError

        if earliest_utc_ts_var == "":
            earliest_utc_ts_var = _beginning_time_

        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            earliest_utc_ts_var = TP.timestamp_convert_format(
                earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT
            )

    except KeyError:
        earliest_utc_ts_var = _beginning_time_

    except TypeError:
        err_msg = "Please enter a valid start-timestamp."
        earliest_utc_ts_var = _beginning_time_

    return err_msg, earliest_utc_ts_var, latest_utc_ts_var

예제 #2

0

파일 보기

def index(request):
    
    err_msg, earliest_utc_ts_var, latest_utc_ts_var = process_filter_data(request)
    
    sltl = DL.SquidLogTableLoader()
    
    """ Show the squid log table """
    squid_table = sltl.get_all_rows_unique_start_time()
    filtered_squid_table = list()
    
    for row in squid_table:
         
        log_start_time = sltl.get_squid_log_record_field(row, 'start_time')
        
        """ Ensure the timestamp is properly formatted """
        if TP.is_timestamp(log_start_time, 2):
            log_start_time = TP.timestamp_convert_format(log_start_time, 2, 1)
            
        if int(log_start_time) > int(earliest_utc_ts_var) and int(log_start_time) < int(latest_utc_ts_var):
            filtered_squid_table.append(row)
    
    squid_table = filtered_squid_table
    squid_table.reverse()
    column_names = sltl.get_column_names()
    new_column_names = list()
    
    for name in column_names:
        new_column_names.append(sltl.get_verbose_column(name))
        
    squid_table = DR.DataReporting()._write_html_table(squid_table, new_column_names)
    
    """ Show the latest log that has been or is loading and its progress """
    completion_rate = sltl.get_completion_rate_of_latest_log()
    
    return render_to_response('LML/index.html', {'err_msg' : err_msg, 'squid_table' : squid_table, 'completion_rate' : completion_rate},  context_instance=RequestContext(request))

예제 #3

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def impression_list(request):
    
    err_msg = ''
    where_clause = ''
    
    """ 
        Process times and POST
        =============
    """
    duration_hrs = 2
    end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-duration_hrs)    
    
    if 'earliest_utc_ts' in request.POST:
        if cmp(request.POST['earliest_utc_ts'], '') != 0:
            earliest_utc_ts = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip())
            format = TP.getTimestampFormat(earliest_utc_ts)
            
            if format == 1:
                start_time = earliest_utc_ts
            if format == 2:
                start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'
    
    if 'latest_utc_ts' in request.POST:
        if cmp(request.POST['latest_utc_ts'], '') != 0:
            latest_utc_ts = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip())
            format = TP.getTimestampFormat(latest_utc_ts)
            
            if format == 1:
                end_time = latest_utc_ts
            if format == 2:
                end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'
            
    if 'iso_code' in request.POST:
        if cmp(request.POST['iso_code'], '') != 0:
            iso_code = MySQLdb._mysql.escape_string(request.POST['iso_code'].strip())
            where_clause = "where bi.country regexp '%s' " % iso_code
                    
    """ 
        Format and execute query 
        ========================
    """
        
    query_name = 'report_country_impressions.sql'    
    
    sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
    sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time, start_time, end_time, where_clause)
    
    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    column_names = dl.get_column_names()

    imp_table = DR.DataReporting()._write_html_table(results, column_names)
    
    return render_to_response('live_results/impression_list.html', {'imp_table' : imp_table.decode("utf-8"), 'err_msg' : err_msg, 'start' : TP.timestamp_convert_format(start_time, 1, 2), 'end' : TP.timestamp_convert_format(end_time, 1, 2)},  context_instance=RequestContext(request))

예제 #4

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def get_timestamps_with_interval(self, logFileName, interval):

        log_end = self.get_timestamps(logFileName)[1]

        end_obj = TP.timestamp_to_obj(log_end, 1)
        start_obj = end_obj + datetime.timedelta(minutes=-interval)

        start_timestamp = TP.timestamp_from_obj(start_obj, 1, 2)
        end_timestamp = TP.timestamp_from_obj(end_obj, 1, 2)

        return [start_timestamp, end_timestamp]

예제 #5

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def process_post_vars(request):

    end_time, start_time = TP.timestamps_for_interval(
        datetime.datetime.utcnow(), 1, hours=-24)

    # POST: minimum donations for records
    try:
        min_donations_var = MySQLdb._mysql.escape_string(
            request.POST['min_donations'].strip())
        min_donations_var = int(min_donations_var)

    except:
        min_donations_var = 0

    # POST Start Timestamp for records
    try:

        earliest_utc_ts_var = MySQLdb._mysql.escape_string(
            request.POST['utc_ts'].strip())
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var,
                                                     TP.TS_FORMAT_FORMAT1,
                                                     TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var

    except Exception:  # In the case the form was incorrectly formatted notify the user
        pass

    # POST: minimum donations for records
    try:
        view_order = MySQLdb._mysql.escape_string(
            request.POST['view_order'].strip())

        if cmp(view_order, 'campaign') == 0:
            view_order_str = 'order by utm_campaign, country, language, landing_page desc'
        elif cmp(view_order, 'country') == 0:
            view_order_str = 'order by country, language, utm_campaign, landing_page desc'

    except:
        view_order_str = 'order by utm_campaign, country, language, landing_page desc'

    return start_time, end_time, min_donations_var, view_order_str

예제 #6

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def process_post_vars(request):
    
    end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-24)
    
    # POST: minimum donations for records
    try:
        min_donations_var = MySQLdb._mysql.escape_string(request.POST['min_donations'].strip())
        min_donations_var = int(min_donations_var)
        
    except:
        min_donations_var = 0
    
    # POST Start Timestamp for records
    try:
        
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['utc_ts'].strip())
        
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
    
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var
                
    except Exception: # In the case the form was incorrectly formatted notify the user        
        pass
    
    # POST: minimum donations for records
    try:
        view_order = MySQLdb._mysql.escape_string(request.POST['view_order'].strip())
        
        if cmp(view_order, 'campaign') == 0:
            view_order_str = 'order by utm_campaign, country, language, landing_page desc'
        elif cmp(view_order, 'country') == 0:
            view_order_str = 'order by country, language, utm_campaign, landing_page desc'
        
    except:
        view_order_str = 'order by utm_campaign, country, language, landing_page desc'
    
    return start_time, end_time, min_donations_var, view_order_str

예제 #7

0

파일 보기

파일: DataFilters.py 프로젝트: rfaulkner/WMF_Analytics

    def execute(self):
        
        DataFilter.execute(self)
        
        counts = self._mutable_obj_._counts_
        times = self._mutable_obj_._times_
        item_keys = self._mutable_obj_._item_keys_

        """ Select only the specified item keys """
        if len(item_keys) > 0:
            counts = self._mutable_obj_.select_metric_keys(counts)
            times = self._mutable_obj_.select_metric_keys(times)
        
        """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
        for key in times.keys():
            times[key] = TP.normalize_timestamps(times[key], False, 3)        

            """ BUG FIX - remove any repeated indices """
            fixed_times = list()
            for time_index in range(len(times[key]) - 1):
                if times[key][time_index] != times[key][time_index + 1]:
                    fixed_times.append(times[key][time_index])
            if len(times[key]) > 0:
                fixed_times.append(times[key][-1])
            times[key] = fixed_times
            times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], self._interval_)                    
            
        """ If there are missing metrics add them as zeros """
        for artifact_key in self._artifact_keys_:

            if not(artifact_key in times.keys()):
                times[artifact_key] = times[times.keys()[0]]
                counts[artifact_key] = [0.0] * len(times[artifact_key])
        
        """  Remove artifacts not in the list if there are any labels specified """
        if len(self._artifact_keys_) > 0:
            for key in counts.keys():
                if key not in self._artifact_keys_:
                    del counts[key]
                    del times[key]

예제 #8

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def index(request):

    err_msg, earliest_utc_ts_var, latest_utc_ts_var = process_filter_data(request)

    sltl = DL.SquidLogTableLoader()

    """ Show the squid log table """
    squid_table = sltl.get_all_rows_unique_start_time()
    filtered_squid_table = list()

    for row in squid_table:

        log_start_time = sltl.get_squid_log_record_field(row, "start_time")

        """ Ensure the timestamp is properly formatted """
        if TP.is_timestamp(log_start_time, 2):
            log_start_time = TP.timestamp_convert_format(log_start_time, 2, 1)

        if int(log_start_time) > int(earliest_utc_ts_var) and int(log_start_time) < int(latest_utc_ts_var):
            filtered_squid_table.append(row)

    squid_table = filtered_squid_table
    squid_table.reverse()
    column_names = sltl.get_column_names()
    new_column_names = list()

    for name in column_names:
        new_column_names.append(sltl.get_verbose_column(name))

    squid_table = DR.DataReporting()._write_html_table(squid_table, new_column_names)

    """ Show the latest log that has been or is loading and its progress """
    completion_rate = sltl.get_completion_rate_of_latest_log()

    return render_to_response(
        "LML/index.html",
        {"err_msg": err_msg, "squid_table": squid_table, "completion_rate": completion_rate},
        context_instance=RequestContext(request),
    )

예제 #9

0

파일 보기

파일: DataFilters.py 프로젝트: rfaulkner/WMF_Analytics

    def execute(self):

        DataFilter.execute(self)

        counts = self._mutable_obj_._counts_
        times = self._mutable_obj_._times_
        item_keys = self._mutable_obj_._item_keys_
        """ Select only the specified item keys """
        if len(item_keys) > 0:
            counts = self._mutable_obj_.select_metric_keys(counts)
            times = self._mutable_obj_.select_metric_keys(times)
        """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
        for key in times.keys():
            times[key] = TP.normalize_timestamps(times[key], False, 3)
            """ BUG FIX - remove any repeated indices """
            fixed_times = list()
            for time_index in range(len(times[key]) - 1):
                if times[key][time_index] != times[key][time_index + 1]:
                    fixed_times.append(times[key][time_index])
            if len(times[key]) > 0:
                fixed_times.append(times[key][-1])
            times[key] = fixed_times
            times[key], counts[key] = TP.normalize_intervals(
                times[key], counts[key], self._interval_)
        """ If there are missing metrics add them as zeros """
        for artifact_key in self._artifact_keys_:

            if not (artifact_key in times.keys()):
                times[artifact_key] = times[times.keys()[0]]
                counts[artifact_key] = [0.0] * len(times[artifact_key])
        """  Remove artifacts not in the list if there are any labels specified """
        if len(self._artifact_keys_) > 0:
            for key in counts.keys():
                if key not in self._artifact_keys_:
                    del counts[key]
                    del times[key]

예제 #10

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def get_time_of_last_log(self):

        log_names = self.get_list_of_logs()

        """ In case directory initialize datetime object to one day in the past  """
        log_time_obj = datetime.datetime.now() + datetime.timedelta(days=-1)

        for name in log_names:

            """ Retrieve the log timestamp from the filename and convert to datetime objects """
            time_stamps = self.get_timestamps_with_interval(name, self._log_copy_interval_)
            log_end_time_obj = TP.timestamp_to_obj(time_stamps[1], 1)

            if name == log_names[0]:
                log_time_obj = log_end_time_obj
            else:
                if log_end_time_obj > log_time_obj:
                    log_time_obj = log_end_time_obj

        return log_time_obj

예제 #11

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def get_timestamps(self, logFileName):

        fname_parts = logFileName.split("-")

        year = int(fname_parts[1])
        month = int(fname_parts[2])
        day = int(fname_parts[3])
        hour = int(fname_parts[4][0:2])
        minute = int(fname_parts[6][0:2])

        # Is this an afternoon log?
        afternoon = fname_parts[4][2:4] == "PM"

        # Adjust the hour as necessary if == 12AM or *PM
        if afternoon and hour < 12:
            hour = hour + 12

        if not (afternoon) and hour == 12:
            hour = 0

        prev_hr = TP.getPrevHour(year, month, day, hour)

        str_month = "0" + str(month) if month < 10 else str(month)
        str_day = "0" + str(day) if day < 10 else str(day)
        str_hour = "0" + str(hour) if hour < 10 else str(hour)
        str_minute = "0" + str(minute) if minute < 10 else str(minute)

        prev_month = prev_hr[1]
        prev_day = prev_hr[2]
        prev_hour = prev_hr[3]
        str_prev_month = "0" + str(prev_month) if prev_month < 10 else str(prev_month)
        str_prev_day = "0" + str(prev_day) if prev_day < 10 else str(prev_day)
        str_prev_hour = "0" + str(prev_hour) if prev_hour < 10 else str(prev_hour)

        log_end = str(year) + str_month + str_day + str_hour + str_minute + "00"
        log_start = str(prev_hr[0]) + str_prev_month + str_prev_day + str_prev_hour + "5500"

        return [log_start, log_end]

예제 #12

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def _clear_squid_records(self, start, request_type):

        """ Ensure that the range is correct; otherwise abort - critical that outside records are not deleted """
        timestamp = TP.timestamp_convert_format(start, 1, 2)

        try:
            if request_type == self._BANNER_REQUEST_:
                self._DL_impressions_.delete_row(timestamp)
            elif request_type == self._LP_REQUEST_:
                self._DL_LPrequests_.delete_row(timestamp)

            logging.info("Executed delete for start time " + timestamp)

        except Exception as inst:

            logging.error("Could not execute delete for start time " + timestamp)

            logging.error(type(inst))  # the exception instance
            logging.error(inst.args)  # arguments stored in .args
            logging.error(inst)  # __str__ allows args to printed directly

            """ Die if the records cannot be removed """
            sys.exit()

예제 #13

0

파일 보기

def generate_summary(request):

    try:

        err_msg = ''
        """ 
            PROCESS POST DATA
            ================= 
            
            Escape all user input that can be entered in text fields 
            
        """
        if 'utm_campaign' in request.POST:
            utm_campaign = MySQLdb._mysql.escape_string(
                request.POST['utm_campaign'])

        if 'start_time' in request.POST:
            start_time = MySQLdb._mysql.escape_string(
                request.POST['start_time'].strip())

            if not (TP.is_timestamp(start_time, 1)) and not (TP.is_timestamp(
                    start_time, 2)):
                err_msg = 'Incorrectly formatted start timestamp.'
                raise Exception()

        if 'end_time' in request.POST:
            end_time = MySQLdb._mysql.escape_string(
                request.POST['end_time'].strip())

            if not (TP.is_timestamp(end_time, 1)) and not (TP.is_timestamp(
                    end_time, 2)):
                err_msg = 'Incorrectly formatted end timestamp.'
                raise Exception()

        if 'iso_filter' in request.POST:
            country = MySQLdb._mysql.escape_string(request.POST['iso_filter'])
        else:
            country = '.{2}'

        if 'measure_confidence' in request.POST:
            if cmp(request.POST['measure_confidence'], 'yes') == 0:
                measure_confidence = True
            else:
                measure_confidence = False
        else:
            measure_confidence = False

        if 'one_step' in request.POST:
            if cmp(request.POST['one_step'], 'yes') == 0:
                use_one_step = True
            else:
                use_one_step = False
        else:
            use_one_step = False

        if 'donations_only' in request.POST:
            if cmp(request.POST['donations_only'], 'yes') == 0:
                donations_only = True
            else:
                donations_only = False
        else:
            donations_only = False
        """ Convert timestamp format if necessary """

        if TP.is_timestamp(start_time, 2):
            start_time = TP.timestamp_convert_format(start_time, 2, 1)
        if TP.is_timestamp(end_time, 2):
            end_time = TP.timestamp_convert_format(end_time, 2, 1)
        """ =============================================== """
        """ 
            GENERATE A REPORT SUMMARY TABLE
            ===============================
        """

        if donations_only:
            srl = DL.SummaryReportingLoader(
                query_type=FDH._TESTTYPE_DONATIONS_)
        else:
            srl = DL.SummaryReportingLoader(
                query_type=FDH._TESTTYPE_BANNER_LP_)

        srl.run_query(start_time,
                      end_time,
                      utm_campaign,
                      min_views=-1,
                      country=country)

        column_names = srl.get_column_names()
        summary_results = srl.get_results()

        if not (summary_results):
            html_table = '<h3>No artifact summary data available for %s.</h3>' % utm_campaign

        else:
            summary_results_list = list()
            for row in summary_results:
                summary_results_list.append(list(row))
            summary_results = summary_results_list
            """ 
                Format results to encode html table cell markup in results        
            """
            if measure_confidence:

                ret = DR.ConfidenceReporting(
                    query_type='', hyp_test='').get_confidence_on_time_range(
                        start_time,
                        end_time,
                        utm_campaign,
                        one_step=use_one_step,
                        country=country)  # first get color codes on confidence
                conf_colour_code = ret[0]

                for row_index in range(len(summary_results)):

                    artifact_index = summary_results[row_index][
                        0] + '-' + summary_results[row_index][
                            1] + '-' + summary_results[row_index][2]

                    for col_index in range(len(column_names)):

                        is_coloured_cell = False
                        if column_names[col_index] in conf_colour_code.keys():
                            if artifact_index in conf_colour_code[
                                    column_names[col_index]].keys():
                                summary_results[row_index][
                                    col_index] = '<td style="background-color:' + conf_colour_code[
                                        column_names[col_index]][
                                            artifact_index] + ';">' + str(
                                                summary_results[row_index]
                                                [col_index]) + '</td>'
                                is_coloured_cell = True

                        if not (is_coloured_cell):
                            summary_results[row_index][
                                col_index] = '<td>' + str(
                                    summary_results[row_index]
                                    [col_index]) + '</td>'

                html_table = DR.DataReporting()._write_html_table(
                    summary_results,
                    column_names,
                    use_standard_metric_names=True,
                    omit_cell_markup=True)

            else:

                html_table = DR.DataReporting()._write_html_table(
                    summary_results,
                    column_names,
                    use_standard_metric_names=True)
        """ Generate totals only if it's a non-donation-only query """

        if donations_only:
            srl = DL.SummaryReportingLoader(
                query_type=FDH._QTYPE_TOTAL_DONATIONS_)
        else:
            srl = DL.SummaryReportingLoader(query_type=FDH._QTYPE_TOTAL_)

        srl.run_query(start_time,
                      end_time,
                      utm_campaign,
                      min_views=-1,
                      country=country)

        total_summary_results = srl.get_results()

        if not (total_summary_results):
            html_table = html_table + '<div class="spacer"></div><div class="spacer"></div><h3>No data available for %s Totals.</h3>' % utm_campaign

        else:
            html_table = html_table + '<div class="spacer"></div><div class="spacer"></div>' + DR.DataReporting(
            )._write_html_table(total_summary_results,
                                srl.get_column_names(),
                                use_standard_metric_names=True)

        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(
            query_type='bannerlp',
            hyp_test='TTest').get_confidence_legend_table()

        html_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \
        '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + html_table
        """ 
            DETERMINE PAYMENT METHODS 
            =========================
        """

        ccl = DL.CiviCRMLoader()
        pm_data_counts, pm_data_conversions = ccl.get_payment_methods(
            utm_campaign, start_time, end_time, country=country)

        html_table_pm_counts = DR.IntervalReporting(
        ).write_html_table_from_rowlists(
            pm_data_counts, ['Payment Method', 'Portion of Donations (%)'],
            'Landing Page')
        html_table_pm_conversions = DR.IntervalReporting(
        ).write_html_table_from_rowlists(pm_data_conversions, [
            'Payment Method', 'Visits', 'Conversions', 'Conversion Rate (%)',
            'Amount', 'Amount 25'
        ], 'Landing Page')

        html_table = html_table + '<div class="spacer"></div><h4><u>Payment Methods Breakdown:</u></h4><div class="spacer"></div>' + html_table_pm_counts + \
        '<div class="spacer"></div><div class="spacer"></div>' + html_table_pm_conversions + '<div class="spacer"></div><div class="spacer"></div>'

        return render_to_response('tests/table_summary.html', {
            'html_table': html_table,
            'utm_campaign': utm_campaign
        },
                                  context_instance=RequestContext(request))

    except Exception as inst:

        if cmp(err_msg, '') == 0:
            err_msg = 'Could not generate campaign tabular results.'

        return index(request, err_msg=err_msg)

예제 #14

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def daily_totals(request):    

    err_msg = ''
    
    start_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    
    """
        PROCESS POST
    """
    
    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)
            
            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'
            
    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:                
                min_donation = int(MySQLdb._mysql.escape_string(request.POST['min_donation'].strip()))
            except:
                logging.error('live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip())
                min_donation = 0
    
    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
            
    """
        === END POST ===
    """
    
    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__+ query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str)
    
    dl = DL.DataLoader()    
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(results, dl.get_column_names(), use_standard_metric_names=True)
    
    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))

예제 #15

0

파일 보기

def process_filter_data(request):
        
    err_msg = ''
    
    time_curr = datetime.datetime.utcnow()
    time_dayback = time_curr + datetime.timedelta(hours = -4)
     
    _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3)
    _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3)
    
    
    """ 
        PROCESS POST VARS 
        =================
    """
    
    try:
        
        latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip())
        
        if not(TP.is_timestamp(latest_utc_ts_var, 1)) and not(TP.is_timestamp(latest_utc_ts_var, 2)):
            raise TypeError

        if latest_utc_ts_var == '':
            latest_utc_ts_var = _end_time_
        
        ts_format = TP.getTimestampFormat(latest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
            
    except KeyError:        
        latest_utc_ts_var = _end_time_
    
    except TypeError:        
        err_msg = 'Please enter a valid end-timestamp.'        
        latest_utc_ts_var = _end_time_


    try:
        
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip())
        
        if not(TP.is_timestamp(earliest_utc_ts_var, 1)) and not(TP.is_timestamp(earliest_utc_ts_var, 2)):
            raise TypeError

        if earliest_utc_ts_var == '':
            earliest_utc_ts_var = _beginning_time_
        
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        if ts_format == TP.TS_FORMAT_FORMAT1:
            earliest_utc_ts_var = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
            
    except KeyError:
        earliest_utc_ts_var = _beginning_time_
    
    except TypeError:        
        err_msg = 'Please enter a valid start-timestamp.'        
        earliest_utc_ts_var = _beginning_time_
        
        
    return err_msg, earliest_utc_ts_var, latest_utc_ts_var

예제 #16

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def daily_totals(request):

    err_msg = ''

    start_day_ts = TP.timestamp_from_obj(
        datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0)
    end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0)
    country = '.{2}'
    min_donation = 0
    order_str = 'order by 1 desc,3 desc'
    """
        PROCESS POST
    """

    if 'start_day_ts' in request.POST:
        if cmp(request.POST['start_day_ts'], '') != 0:
            start_day_ts = MySQLdb._mysql.escape_string(
                request.POST['start_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1)
                # start_day_ts = start_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'end_day_ts' in request.POST:
        if cmp(request.POST['end_day_ts'], '') != 0:
            end_day_ts = MySQLdb._mysql.escape_string(
                request.POST['end_day_ts'].strip())
            format = TP.getTimestampFormat(start_day_ts)

            if format == 2:
                end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1)
                # end_day_ts = end_day_ts[:8] + '000000'
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'

    if 'country' in request.POST:
        if cmp(request.POST['country'], '') != 0:
            country = MySQLdb._mysql.escape_string(request.POST['country'])

    if 'min_donation' in request.POST:
        if cmp(request.POST['min_donation'], '') != 0:
            try:
                min_donation = int(
                    MySQLdb._mysql.escape_string(
                        request.POST['min_donation'].strip()))
            except:
                logging.error(
                    'live_results/daily_totals -- Could not process minimum donation for "%s" '
                    % request.POST['min_donation'].strip())
                min_donation = 0

    if 'order_metric' in request.POST:
        if cmp(request.POST['order_metric'], 'Date') == 0:
            order_str = 'order by 1 desc,3 desc'
        elif cmp(request.POST['order_metric'], 'Country') == 0:
            order_str = 'order by 2 asc,1 desc'
    """
        === END POST ===
    """

    query_name = 'report_daily_totals_by_country'
    filename = projSet.__sql_home__ + query_name + '.sql'
    sql_stmnt = Hlp.file_to_string(filename)
    sql_stmnt = QD.format_query(query_name,
                                sql_stmnt, [start_day_ts, end_day_ts],
                                country=country,
                                min_donation=min_donation,
                                order_str=order_str)

    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    html_table = DR.DataReporting()._write_html_table(
        results, dl.get_column_names(), use_standard_metric_names=True)

    return render_to_response('live_results/daily_totals.html', \
                              {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \
                              context_instance=RequestContext(request))

예제 #17

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def impression_list(request):

    err_msg = ''
    where_clause = ''
    """ 
        Process times and POST
        =============
    """
    duration_hrs = 2
    end_time, start_time = TP.timestamps_for_interval(
        datetime.datetime.utcnow(), 1, hours=-duration_hrs)

    if 'earliest_utc_ts' in request.POST:
        if cmp(request.POST['earliest_utc_ts'], '') != 0:
            earliest_utc_ts = MySQLdb._mysql.escape_string(
                request.POST['earliest_utc_ts'].strip())
            format = TP.getTimestampFormat(earliest_utc_ts)

            if format == 1:
                start_time = earliest_utc_ts
            if format == 2:
                start_time = TP.timestamp_convert_format(earliest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'Start timestamp is formatted incorrectly\n'

    if 'latest_utc_ts' in request.POST:
        if cmp(request.POST['latest_utc_ts'], '') != 0:
            latest_utc_ts = MySQLdb._mysql.escape_string(
                request.POST['latest_utc_ts'].strip())
            format = TP.getTimestampFormat(latest_utc_ts)

            if format == 1:
                end_time = latest_utc_ts
            if format == 2:
                end_time = TP.timestamp_convert_format(latest_utc_ts, 2, 1)
            elif format == -1:
                err_msg = err_msg + 'End timestamp is formatted incorrectly\n'

    if 'iso_code' in request.POST:
        if cmp(request.POST['iso_code'], '') != 0:
            iso_code = MySQLdb._mysql.escape_string(
                request.POST['iso_code'].strip())
            where_clause = "where bi.country regexp '%s' " % iso_code
    """ 
        Format and execute query 
        ========================
    """

    query_name = 'report_country_impressions.sql'

    sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
    sql_stmnt = sql_stmnt % (start_time, end_time, start_time, end_time,
                             start_time, end_time, where_clause)

    dl = DL.DataLoader()
    results = dl.execute_SQL(sql_stmnt)
    column_names = dl.get_column_names()

    imp_table = DR.DataReporting()._write_html_table(results, column_names)

    return render_to_response(
        'live_results/impression_list.html', {
            'imp_table': imp_table.decode("utf-8"),
            'err_msg': err_msg,
            'start': TP.timestamp_convert_format(start_time, 1, 2),
            'end': TP.timestamp_convert_format(end_time, 1, 2)
        },
        context_instance=RequestContext(request))

예제 #18

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of fundraiser totals data at:  %s' % self.CACHING_HOME)        
                
        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        
        """ DATA CONFIG """
        
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')
        
        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_of_2011_fundraiser)
        countries.append('Total')
        
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:                
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']
        
        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_
        
        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
                        
        """ END CONFIG """
        
        
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
            
        dr = DR.DataReporting()
        
        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()
        
        for country in countries:
            
            key_2011 = '2011 ' +  country
            key_2010 = '2010 ' +  country
            
            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]
            
            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]
            
            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))
            
            dict_param[country] = Hlp.combine_data_lists(data)
        
        self.clear_cached_data(key)
        self.cache_data(dict_param, key)
        
        logging.info('Caching complete.')

예제 #19

0

파일 보기

def test(request):

    try:
        """ 
            PROCESS POST DATA
            ================= 
            
            Escape all user input that can be entered in text fields 
            
        """
        test_name_var = MySQLdb._mysql.escape_string(
            request.POST['test_name'].strip())
        utm_campaign_var = MySQLdb._mysql.escape_string(
            request.POST['utm_campaign'].strip())
        start_time_var = MySQLdb._mysql.escape_string(
            request.POST['start_time'].strip())
        end_time_var = MySQLdb._mysql.escape_string(
            request.POST['end_time'].strip())
        one_step_var = MySQLdb._mysql.escape_string(
            request.POST['one_step'].strip())
        country = MySQLdb._mysql.escape_string(request.POST['iso_filter'])
        """ Convert timestamp format if necessary """
        if TP.is_timestamp(start_time_var, 2):
            start_time_var = TP.timestamp_convert_format(start_time_var, 2, 1)
        if TP.is_timestamp(end_time_var, 2):
            end_time_var = TP.timestamp_convert_format(end_time_var, 2, 1)

        if cmp(one_step_var, 'True') == 0:
            one_step_var = True
        else:
            one_step_var = False

        try:
            test_type_var = MySQLdb._mysql.escape_string(
                request.POST['test_type'])
            labels = request.POST['artifacts']

        except KeyError:

            test_type_var, labels = FDH.get_test_type(
                utm_campaign_var, start_time_var, end_time_var,
                DL.CampaignReportingLoader(
                    query_type=''))  # submit an empty query type
            labels = labels.__str__()

        label_dict = dict()
        label_dict_full = dict()

        labels = labels[1:-1].split(',')
        """ Parse the labels """
        for i in range(len(labels)):
            labels[i] = labels[i]
            label = labels[i].split('\'')[1]
            label = label.strip()
            pieces = label.split(' ')
            label = pieces[0]
            for j in range(len(pieces) - 1):
                label = label + '_' + pieces[j + 1]
            """ Escape the label parameters """
            label = MySQLdb._mysql.escape_string(label)
            label_dict_full[label] = label
        """ Look at the artifact names and map them into a dict() - Determine if artifacts were chosen by the user """

        if request.POST.__contains__('artifacts_chosen'):

            artifacts_chosen = request.POST.getlist('artifacts_chosen')
            """ Ensure that only two items are selected """
            if len(artifacts_chosen) > 2:
                raise Exception(
                    'Please select (checkboxes) exactly two items to test')

            for elem in artifacts_chosen:
                esc_elem = MySQLdb._mysql.escape_string(str(elem))
                label_dict[esc_elem] = esc_elem
        else:
            label_dict = label_dict_full
        """ Parse the added labels IF they are not empty """
        for key in label_dict.keys():

            try:
                if not (request.POST[key] == ''):

                    label_dict[key] = MySQLdb._mysql.escape_string(
                        str(request.POST[key]))
                else:
                    label_dict[key] = key
            except:
                logging.error('Could not find %s in the POST QueryDict.' % key)

        for key in label_dict_full.keys():
            try:
                if not (request.POST[key] == ''):
                    label_dict_full[key] = MySQLdb._mysql.escape_string(
                        str(request.POST[key]))
                else:
                    label_dict_full[key] = key
            except:
                logging.error('Could not find %s in the POST QueryDict.' % key)
        """ 
            EXECUTE REPORT GENERATION
            =========================
        
            setup time parameters
            determine test metrics
            execute queries
        """

        sample_interval = 1

        start_time_obj = TP.timestamp_to_obj(start_time_var, 1)
        end_time_obj = TP.timestamp_to_obj(end_time_var, 1)

        time_diff = end_time_obj - start_time_obj
        time_diff_min = time_diff.seconds / 60.0
        test_interval = int(math.floor(time_diff_min /
                                       sample_interval))  # 2 is the interval

        metric_types = FDH.get_test_type_metrics(test_type_var)
        metric_types_full = dict()
        """ Get the full (descriptive) version of the metric names 
            !! FIXME / TODO -- order these properly !! """

        for i in range(len(metric_types)):
            metric_types_full[metric_types[i]] = QD.get_metric_full_name(
                metric_types[i])
        """ USE generate_reporting_objects() TO GENERATE THE REPORT DATA - dependent on test type """

        measured_metric, winner, loser, percent_win, confidence, html_table_pm_banner, html_table_pm_lp, html_table_language, html_table \
        =  generate_reporting_objects(test_name_var, start_time_var, end_time_var, utm_campaign_var, label_dict, label_dict_full, \
                                      sample_interval, test_interval, test_type_var, metric_types, one_step_var, country)

        winner_var = winner[0]

        results = list()
        for index in range(len(winner)):
            results.append({
                'metric': measured_metric[index],
                'winner': winner[index],
                'loser': loser[index],
                'percent_win': percent_win[index],
                'confidence': confidence[index]
            })

        template_var_dict = {'results' : results,  \
                  'utm_campaign' : utm_campaign_var, 'metric_names_full' : metric_types_full, \
                  'summary_table': html_table, 'sample_interval' : sample_interval, \
                  'banner_pm_table' : html_table_pm_banner, 'lp_pm_table' : html_table_pm_lp, 'html_table_language' : html_table_language, \
                  'start_time' : TP.timestamp_convert_format(start_time_var, 1, 2) , 'end_time' : TP.timestamp_convert_format(end_time_var, 1, 2)}

        html = render_to_response('tests/results_' + test_type_var + '.html',
                                  template_var_dict,
                                  context_instance=RequestContext(request))
        """ 
            WRITE TO TEST TABLE
            =================== 
        
        """

        ttl = DL.TestTableLoader()
        """ Format the html string """
        html_string = html.__str__()
        html_string = html_string.replace('"', '\\"')

        if ttl.record_exists(utm_campaign=utm_campaign_var):
            ttl.update_test_row(test_name=test_name_var,
                                test_type=test_type_var,
                                utm_campaign=utm_campaign_var,
                                start_time=start_time_var,
                                end_time=end_time_var,
                                html_report=html_string,
                                winner=winner_var)
        else:
            ttl.insert_row(test_name=test_name_var,
                           test_type=test_type_var,
                           utm_campaign=utm_campaign_var,
                           start_time=start_time_var,
                           end_time=end_time_var,
                           html_report=html_string,
                           winner=winner_var)

        return html

    except Exception as inst:

        logging.error('Failed to correctly generate test report.')
        logging.error(type(inst))
        logging.error(inst.args)
        logging.error(inst)
        """ Return to the index page with an error """
        try:
            err_msg = 'Test Generation failed for: %s.  Check the fields submitted for generation. <br><br>ERROR:<br><br>%s' % (
                utm_campaign_var, inst.__str__())
        except:
            err_msg = 'Test Generation failed.  Check the fields submitted for generation. <br><br>ERROR:<br><br>%s' % inst.__str__(
            )
            return campaigns_index(request, kwargs={'err_msg': err_msg})

        return show_campaigns(request,
                              utm_campaign_var,
                              kwargs={'err_msg': err_msg})

예제 #20

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of fundraiser totals data at:  %s' %
                     self.CACHING_HOME)

        end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
        """ DATA CONFIG """
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='db1025')

        start_of_2011_fundraiser = '20111116000000'
        countries = DL.CiviCRMLoader().get_ranked_donor_countries(
            start_of_2011_fundraiser)
        countries.append('Total')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        year_groups = dict()
        for country in countries:
            if cmp(country, 'Total') == 0:
                year_groups['2011 Total'] = ['2011.*']
                year_groups['2010 Total'] = ['2010.*']
            else:
                year_groups['2011 ' + country] = ['2011' + country]
                year_groups['2010 ' + country] = ['2010' + country]

        metrics = 'amount'
        weights = ''
        groups = year_groups
        group_metrics = ['year', 'country']

        metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_

        include_totals = False
        include_others = False
        hours_back = 0
        time_unit = TP.DAY
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """

        dr = DR.DataReporting()

        times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \
                                        include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit)
        dict_param = dict()

        for country in countries:

            key_2011 = '2011 ' + country
            key_2010 = '2010 ' + country

            new_counts = dict()
            new_counts[key_2010] = counts[key_2010]
            new_counts[key_2011] = counts[key_2011]

            new_times = dict()
            new_times[key_2010] = times[key_2010]
            new_times[key_2011] = times[key_2011]

            dr._counts_ = new_counts
            dr._times_ = new_times

            empty_data = [0] * len(new_times[new_times.keys()[0]])
            data = list()
            data.append(dr.get_data_lists([''], empty_data))

            dict_param[country] = Hlp.combine_data_lists(data)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')

예제 #21

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of long term trends data at:  %s' %
                     self.CACHING_HOME)

        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                          hours=-self.VIEW_DURATION_HRS, resolution=1)
        """ DATA CONFIG """

        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
        countries = countries[1:6]
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='storage3')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
        payment_groups = {'Credit Card': ['^cc$'], 'Paypal': ['^pp$']}
        currency_groups = {
            'USD': ['(USD)'],
            'CAD': ['(CAD)'],
            'JPY': ['(JPY)'],
            'EUR': ['(EUR)']
        }
        lang_cntry_groups = {
            'US': ['US..', '.{4}'],
            'EN': ['[^U^S]en', '.{4}']
        }

        top_cntry_groups = dict()
        for country in countries:
            top_cntry_groups[country] = [country, '.{2}']

        # To include click rate
        # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
        # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]

        metrics = [
            'impressions', 'views', 'donations', 'donations', 'amount',
            'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate'
        ]
        weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
        metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
        groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
                  lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
        """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """
        group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                         ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                         ['country', 'language'], ['payment_method'], ['payment_method']]

        metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_]

        include_totals = [
            True, True, True, False, True, True, False, False, False, True
        ]
        include_others = [
            True, True, True, False, True, True, True, True, True, False
        ]
        hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
        time_unit = [
            TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR,
            TP.HOUR, TP.HOUR, TP.HOUR
        ]

        data = list()
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
        for index in range(len(metrics)):

            dr = DR.DataReporting()

            times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                            groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                            include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                            time_unit=time_unit[index])

            times = TP.normalize_timestamps(times, False, time_unit[index])

            dr._counts_ = counts
            dr._times_ = times

            empty_data = [0] * len(times[times.keys()[0]])
            data.append(dr.get_data_lists([''], empty_data))

        dict_param = Hlp.combine_data_lists(data)
        dict_param['interval'] = self.VIEW_DURATION_HRS
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')

예제 #22

0

파일 보기

def index(request, **kwargs):
    """ 
        PROCESS POST DATA
        ================= 
    """

    if 'err_msg' in kwargs:
        err_msg = kwargs['err_msg']
    else:
        err_msg = ''

    try:

        latest_utc_ts_var = MySQLdb._mysql.escape_string(
            request.POST['latest_utc_ts'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(
            request.POST['earliest_utc_ts'].strip())

        if not TP.is_timestamp(earliest_utc_ts_var, 1) or not TP.is_timestamp(
                earliest_utc_ts_var, 1):
            raise TypeError

        if latest_utc_ts_var == '':
            latest_utc_ts_var = _end_time_

    except KeyError:

        earliest_utc_ts_var = _beginning_time_
        latest_utc_ts_var = _end_time_

    except TypeError:

        err_msg = 'Please enter a valid timestamp.'

        earliest_utc_ts_var = _beginning_time_
        latest_utc_ts_var = _end_time_

    ttl = DL.TestTableLoader()
    columns = ttl.get_column_names()
    test_rows = ttl.get_all_test_rows()
    """ Build a list of tests -- apply filters """
    l = []

    utm_campaign_index = ttl.get_test_index('utm_campaign')
    html_report_index = ttl.get_test_index('html_report')

    for i in test_rows:
        test_start_time = ttl.get_test_field(i, 'start_time')
        new_row = list(i)
        """ Ensure the timestamp is properly formatted """
        if TP.is_timestamp(test_start_time, 2):
            test_start_time = TP.timestamp_convert_format(
                test_start_time, 2, 1)

        new_row[
            html_report_index] = '<a href="/tests/report/%s">view</a>' % new_row[
                utm_campaign_index]

        if int(test_start_time) > int(earliest_utc_ts_var) and int(
                test_start_time) < int(latest_utc_ts_var):
            l.append(new_row)

    l.reverse()

    test_table = DR.DataReporting()._write_html_table(
        l, columns, use_standard_metric_names=True)

    return render_to_response('tests/index.html', {
        'err_msg': err_msg,
        'test_table': test_table
    },
                              context_instance=RequestContext(request))

예제 #23

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def mine_squid_landing_page_requests(self, logFileName):

        logging.info("Begin mining of landing page requests in %s" % logFileName)

        """ Create the dataloaders and initialize """
        sltl = DL.SquidLogTableLoader()
        lptl = DL.LandingPageTableLoader()
        ipctl = DL.IPCountryTableLoader()

        """ Retrieve the log timestamp from the filename """
        time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_)

        end = time_stamps[1]
        curr_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)

        """ retrieve the start time of the log """
        start = self.get_first_timestamp_from_log(logFileName)

        """ Initialization - open the file """
        logFile, total_lines_in_file = self.open_logfile(logFileName)

        # Initialization
        hostIndex = 1
        queryIndex = 4
        pathIndex = 2

        """ Clear the old records """
        self._clear_squid_records(start, self._LP_REQUEST_)

        """ Add a row to the SquidLogTable """
        sltl.insert_row(
            type="lp_view",
            log_copy_time=curr_time,
            start_time=start,
            end_time=end,
            log_completion_pct="0.0",
            total_rows="0",
        )

        line_count = 0
        requests_loaded = 0

        """ Extract the mining patterns from the DB """
        mptl = DL.MiningPatternsTableLoader()
        lp_patterns = mptl.get_pattern_lists()[1]

        """
            PROCESS REQUESTS FROM FILE
            ==========================
            
            Sample request:
            
            line = 
                "sq63.wikimedia.org 757671483 2011-06-01T23:00:01.343 93 98.230.113.246 TCP_MISS/200 10201 GET \
                http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg CARP/208.80.152.83 text/html http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg \
                - Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C)"
        """

        line = logFile.readline()
        while line != "":

            lineArgs = line.split()

            """ Get the IP Address of the donor """
            ip_add = lineArgs[4]

            #  SELECT CAST('20070529 00:00:00' AS datetime)

            """ 
                Parse the Timestamp:
                
                Sample timestamp:
                    timestamp = "2011-06-01T23:00:07.612"
            """

            date_and_time = lineArgs[2]

            date_string = date_and_time.split("-")
            time_string = date_and_time.split(":")

            # if the date is not logged ignoere the record
            try:
                year = date_string[0]
                month = date_string[1]
                day = date_string[2][:2]
                hour = time_string[0][-2:]
                min = time_string[1]
                sec = time_string[2][:2]
            except:
                line = logFile.readline()
                total_lines_in_file = total_lines_in_file - 1
                continue

            timestamp_string = year + "-" + month + "-" + day + " " + hour + ":" + min + ":" + sec

            """ 
                Process referrer URL
                =================== 
                
                Sample referrer:
                    referrer_url = http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg
            """

            try:
                referrer_url = lineArgs[11]
            except IndexError:
                referrer_url = "Unavailable"

            parsed_referrer_url = up.urlparse(referrer_url)

            if parsed_referrer_url[hostIndex] == None:
                project = "NONE"
                source_lang = "NONE"
            else:
                hostname = parsed_referrer_url[hostIndex].split(".")

                """ If the hostname of the form '<lang>.<project>.org' """
                if len(hostname[0]) <= 2:
                    # referrer_path = parsed_referrer_url[pathIndex].split('/')
                    project = hostname[0]  # wikimediafoundation.org
                    source_lang = hostname[0]
                else:
                    try:
                        """ species.wikimedia vs en.wikinews """
                        project = hostname[0] if (hostname[1] == "wikimedia") else hostname[1]
                        """ pl.wikipedia vs commons.wikimedia """
                        source_lang = hostname[0] if (len(hostname[1]) < 5) else "en"
                    except:
                        project = "wikipedia" """ default project to 'wikipedia' """
                        source_lang = "en" """ default lang to english """

            """
                Process User agent string
                ========================
                
                sample user agent string:
                    user_agent_string = Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C)
                
            """

            try:
                user_agent_string = lineArgs[13]
            except IndexError:
                user_agent_string = ""

            try:
                user_agent_fields = httpagentparser.detect(user_agent_string)
                browser = "NONE"

                # Check to make sure fields exist
                if len(user_agent_fields["browser"]) != 0:
                    if len(user_agent_fields["browser"]["name"]) != 0:
                        browser = user_agent_fields["browser"]["name"]
            except:

                logging.error("Could not process user agent string.")
                browser = "NONE"

            """
                 Process landing URL
                 ===================
                 
                 sample landing urls:
                 
                     landing_url = "http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg"
                     landing_url = "http://wikimediafoundation.org/wiki/WMFJA1/ru"
                     landing_url = *donate.wikimedia.org/wiki/Special:FundraiserLandingPage?uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107
            """

            try:
                landing_url = lineArgs[8]
            except IndexError:
                landing_url = "Unavailable"

            hostIndex = 1
            queryIndex = 4
            pathIndex = 2

            parsed_landing_url = up.urlparse(landing_url)
            query_fields = cgi.parse_qs(parsed_landing_url[queryIndex])  # Get the banner name and lang
            path_pieces = parsed_landing_url[pathIndex].split("/")

            include_request, url_match = self.evaluate_landing_url(
                landing_url, parsed_landing_url, query_fields, path_pieces, lp_patterns
            )

            if include_request:

                """ Extract the language from the query string 
                        
                    the language has already been read from the url path but if it
                    exists in  the query string this setting should take precedence
                """
                try:
                    source_lang = query_fields["language"][0]
                except:
                    pass

                """ Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... """
                # http://wikimediafoundation.org/wiki/
                if url_match == 1:

                    """ Address cases where the query string does not contain the landing page - ...wikimediafoundation.org/wiki/... """
                    parsed_landing_url = up.urlparse(landing_url)
                    query_fields = cgi.parse_qs(parsed_landing_url[queryIndex])  # Get the banner name and lang

                    landing_path = parsed_landing_url[pathIndex].split("/")
                    landing_page = landing_path[2]

                    # URLs of the form ...?county_code=<iso_code>
                    try:
                        country = query_fields["country"][0]

                    # URLs of the form ...<path>/ <lp_name>/<lang>/<iso_code>
                    except:
                        try:
                            if len(landing_path) == 5:
                                country = landing_path[4]
                                # source_lang = landing_path[3]
                            else:
                                country = landing_path[3]

                        except:

                            logging.info("Could not parse country from landing path: %s", landing_url)
                            line = logFile.readline()
                            total_lines_in_file = total_lines_in_file - 1
                            continue

                # http://wikimediafoundation.org/w/index.php?
                elif url_match == 2:

                    try:

                        """ URLs of the form ...?title=<lp_name> """
                        lp_country = query_fields["title"][0].split("/")
                        landing_page = lp_country[0]

                        """ URLs of the form ...?county_code=<iso_code> """
                        try:
                            country = query_fields["country"][0]
                        except:
                            """ URLs of the form ...?title=<lp_name>/<lang>/<iso_code> """
                            if len(lp_country) == 3:
                                country = lp_country[2]
                            else:
                                country = lp_country[1]

                    except:

                        logging.info("Could not parse landing page request from query string: %s", landing_url)
                        line = logFile.readline()
                        total_lines_in_file = total_lines_in_file - 1
                        continue

                # donate.wikimedia.org/wiki/Special:FundraiserLandingPage?
                elif url_match == 3:

                    try:
                        # e.g. uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107

                        source_lang = query_fields["uselang"][0]
                        country = query_fields["country"][0]

                        landing_page = (
                            query_fields["template"][0].split("-")[2]
                            + "~"
                            + query_fields["appeal-template"][0].split("-")[2]
                            + "~"
                            + query_fields["appeal"][0].split("-")[1]
                            + "~"
                            + query_fields["form-template"][0].split("-")[2]
                            + "~"
                            + query_fields["form-countryspecific"][0].split("-")[2]
                        )

                        utm_source = query_fields["utm_source"][0]
                        utm_campaign = query_fields["utm_campaign"][0] + "_" + country
                        utm_medium = query_fields["utm_medium"][0]

                    except Exception as inst:

                        # logging.info(inst)     # __str__ allows args to printed directly
                        # logging.info('Could not parse landing page request from query string: %s', landing_url)
                        line = logFile.readline()
                        total_lines_in_file = total_lines_in_file - 1
                        continue

                """ If country is confused with the language use the ip """
                if country == country.lower():

                    # logging.info('Using geo-locator to set ip-address: %s', landing_url)
                    country = ipctl.localize_IP(ip_add)

                """ Ensure fields providing request ID exist """
                try:
                    utm_source = query_fields["utm_source"][0]
                    utm_campaign = query_fields["utm_campaign"][0]
                    utm_medium = query_fields["utm_medium"][0]

                except KeyError:

                    line = logFile.readline()
                    total_lines_in_file = total_lines_in_file - 1
                    continue

                """ Insert record into the landing_page_requests table """

                lptl.insert_row(
                    utm_source_arg=utm_source,
                    utm_campaign_arg=utm_campaign,
                    utm_medium_arg=utm_medium,
                    landing_page_arg=landing_page,
                    page_url_arg=landing_url,
                    referrer_url_arg=referrer_url,
                    browser_arg=browser,
                    lang_arg=source_lang,
                    country_arg=country,
                    project_arg=project,
                    ip_arg=ip_add,
                    start_timestamp_arg=start,
                    timestamp_arg=timestamp_string,
                )

                requests_loaded = requests_loaded + 1

            line = logFile.readline()
            line_count = line_count + 1

            """ Log Miner Logging - Update the squid_log_record table """
            if (line_count % 1000) == 0 or line_count == total_lines_in_file:
                completion = float(line_count / total_lines_in_file) * 100.0
                sltl.update_table_row(
                    type="lp_view",
                    log_copy_time=curr_time,
                    start_time=start,
                    end_time=end,
                    log_completion_pct=completion.__str__(),
                    total_rows=line_count.__str__(),
                )

예제 #24

0

파일 보기

파일: DataMapper.py 프로젝트: rfaulkner/WMF_Analytics

    def mine_squid_impression_requests(self, logFileName):

        logging.info("Begin mining of banner impressions in %s" % logFileName)

        sltl = DL.SquidLogTableLoader()
        itl = DL.ImpressionTableLoader()

        """ Retrieve the log timestamp from the filename """
        time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_)

        """ retrieve the start time of the log """
        start = self.get_first_timestamp_from_log(logFileName)

        end = time_stamps[1]
        curr_time = TP.timestamp_from_obj(datetime.datetime.now(), 1, 3)

        """ Initialization - open the file """
        logFile, total_lines_in_file = self.open_logfile(logFileName)
        queryIndex = 4

        counts = Hlp.AutoVivification()
        # insertStmt = 'INSERT INTO ' + self._impression_table_name_ + self._BANNER_FIELDS_ + ' values '

        """ Clear the old records """
        self._clear_squid_records(start, self._BANNER_REQUEST_)

        """ Add a row to the SquidLogTable """
        sltl.insert_row(
            type="banner_impression",
            log_copy_time=curr_time,
            start_time=start,
            end_time=end,
            log_completion_pct="0.0",
            total_rows="0",
        )

        """
            PROCESS LOG FILE
            ================
            
            Sample Request:
            
            line =
            "sq63.wikimedia.org 757675855 2011-06-01T23:00:07.612 0 187.57.227.121 TCP_MEM_HIT/200 1790 GET \
            http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR NONE/- text/javascript http://pt.wikipedia.org/wiki/Modo_e_tempo_verbal \
            - Mozilla/5.0%20(Windows%20NT%206.1)%20AppleWebKit/534.24%20(KHTML,%20like%20Gecko)%20Chrome/11.0.696.71%20Safari/534.24"

        """

        line_count = 0

        line = logFile.readline()
        while line != "":

            lineArgs = line.split()

            """ 
                Parse the Timestamp:
                
                Sample timestamp:
                    timestamp = "2011-06-01T23:00:07.612"
            """

            try:
                time_stamp = lineArgs[2]
                time_bits = time_stamp.split("T")
                date_fields = time_bits[0].split("-")
                time_fields = time_bits[1].split(":")
                time_stamp = date_fields[0] + date_fields[1] + date_fields[2] + time_fields[0] + time_fields[1] + "00"

            except (ValueError, IndexError):
                line = logFile.readline()
                total_lines_in_file = total_lines_in_file - 1
                continue
                # pass

            """ 
                Parse the URL:
                
                Sample url:
                    url = "http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR"
            """

            try:
                url = lineArgs[8]
            except IndexError:
                url = "Unavailable"

            parsedUrl = up.urlparse(url)
            query = parsedUrl[queryIndex]
            queryBits = cgi.parse_qs(query)

            """ Extract - project, banner, language, & country data from the url """
            project = ""
            if "db" in queryBits.keys():
                project = queryBits["db"][0]

            if project == "" and "sitename" in queryBits.keys():
                sitename = queryBits["sitename"][0]
                if sitename:
                    project = sitename
                else:
                    project = "NONE"

            if "banner" in queryBits.keys():
                banner = queryBits["banner"][0]
            else:
                banner = "NONE"

            if "userlang" in queryBits.keys():
                lang = queryBits["userlang"][0]
            else:
                lang = "NONE"

            if "country" in queryBits.keys():
                country = queryBits["country"][0]
            else:
                country = "NONE"

            """ Group banner impression counts based on (banner, country, project, language) """
            try:
                counts[banner][country][project][lang][time_stamp] = (
                    counts[banner][country][project][lang][time_stamp] + 1
                )
            except TypeError:
                counts[banner][country][project][lang][time_stamp] = 1

            line = logFile.readline()
            line_count = line_count + 1

            """ Log Miner Logging - Update the squid_log_record table """
            if line_count % 10000 == 0 or line_count == total_lines_in_file:
                completion = float(line_count / total_lines_in_file) * 100.0
                sltl.update_table_row(
                    type="banner_impression",
                    log_copy_time=curr_time,
                    start_time=start,
                    end_time=end,
                    log_completion_pct=completion.__str__(),
                    total_rows=line_count.__str__(),
                )

        """ ====== FILE COMPLETE ====== """
        logFile.close()

        """ 
            Break out impression data by minute.  This conditional detects when a request with a previously unseen minute in the timestamp appears.
         
            Run through the counts dictionary and insert a row into the banner impressions table for each entry 
        """

        bannerKeys = counts.keys()
        for banner_ind in range(len(bannerKeys)):
            banner = bannerKeys[banner_ind]
            countryCounts = counts[banner]
            countryKeys = countryCounts.keys()

            for country_ind in range(len(countryKeys)):
                country = countryKeys[country_ind]
                projectCounts = countryCounts[country]
                projectKeys = projectCounts.keys()

                for project_ind in range(len(projectKeys)):
                    project = projectKeys[project_ind]
                    langCounts = projectCounts[project]
                    langKeys = langCounts.keys()

                    for lang_ind in range(len(langKeys)):
                        lang = langKeys[lang_ind]
                        timestampCounts = langCounts[lang]
                        timestampKeys = timestampCounts.keys()

                        for timestamp_ind in range(len(timestampKeys)):
                            timestamp = timestampKeys[timestamp_ind]
                            count = timestampCounts[timestamp]

                            itl.insert_row(
                                utm_source_arg=banner,
                                referrer_arg=project,
                                country_arg=country,
                                lang_arg=lang,
                                counts_arg=str(count),
                                on_minute_arg=timestamp,
                                start_timestamp_arg=start,
                            )

예제 #25

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def test(request):
    
    try:
                
        """ 
            PROCESS POST DATA
            ================= 
            
            Escape all user input that can be entered in text fields 
            
        """
        test_name_var = MySQLdb._mysql.escape_string(request.POST['test_name'].strip())
        utm_campaign_var = MySQLdb._mysql.escape_string(request.POST['utm_campaign'].strip())
        start_time_var = MySQLdb._mysql.escape_string(request.POST['start_time'].strip())
        end_time_var = MySQLdb._mysql.escape_string(request.POST['end_time'].strip())    
        one_step_var = MySQLdb._mysql.escape_string(request.POST['one_step'].strip())        
        country = MySQLdb._mysql.escape_string(request.POST['iso_filter'])
                
        
        """ Convert timestamp format if necessary """
        if TP.is_timestamp(start_time_var, 2):
            start_time_var = TP.timestamp_convert_format(start_time_var, 2, 1)
        if TP.is_timestamp(end_time_var, 2):
            end_time_var = TP.timestamp_convert_format(end_time_var, 2, 1)
                
        if cmp(one_step_var, 'True') == 0:
            one_step_var = True
        else:
            one_step_var = False
            
        try: 
            test_type_var = MySQLdb._mysql.escape_string(request.POST['test_type'])
            labels = request.POST['artifacts']
                            
        except KeyError:
    
            test_type_var, labels = FDH.get_test_type(utm_campaign_var, start_time_var, end_time_var, DL.CampaignReportingLoader(query_type=''))  # submit an empty query type           
            labels = labels.__str__() 
        
        label_dict = dict()
        label_dict_full = dict()
        
        labels = labels[1:-1].split(',')        
                                    
        """ Parse the labels """     
        for i in range(len(labels)):
            labels[i] = labels[i]
            label = labels[i].split('\'')[1]
            label = label.strip()            
            pieces = label.split(' ')
            label = pieces[0]
            for j in range(len(pieces) - 1):
                label = label + '_' + pieces[j+1]
            
            """ Escape the label parameters """
            label = MySQLdb._mysql.escape_string(label)
            label_dict_full[label] = label
                
        """ Look at the artifact names and map them into a dict() - Determine if artifacts were chosen by the user """
        
        if request.POST.__contains__('artifacts_chosen'):
            
            artifacts_chosen =  request.POST.getlist('artifacts_chosen')
            
            """ Ensure that only two items are selected """
            if len(artifacts_chosen) > 2:
                raise Exception('Please select (checkboxes) exactly two items to test')
            
            for elem in artifacts_chosen:
                esc_elem = MySQLdb._mysql.escape_string(str(elem))
                label_dict[esc_elem] = esc_elem
        else:
            label_dict = label_dict_full
    
        
        """ Parse the added labels IF they are not empty """
        for key in label_dict.keys():
            
            try:
                if not(request.POST[key] == ''):
                    
                    label_dict[key] = MySQLdb._mysql.escape_string(str(request.POST[key]))
                else:
                    label_dict[key] = key
            except:
                logging.error('Could not find %s in the POST QueryDict.' % key)
        
        for key in label_dict_full.keys():
            try:
                if not(request.POST[key] == ''):
                    label_dict_full[key] = MySQLdb._mysql.escape_string(str(request.POST[key]))
                else:
                    label_dict_full[key] = key
            except:
                logging.error('Could not find %s in the POST QueryDict.' % key)
    
        
        """ 
            EXECUTE REPORT GENERATION
            =========================
        
            setup time parameters
            determine test metrics
            execute queries
        """
        
        sample_interval = 1
        
        start_time_obj = TP.timestamp_to_obj(start_time_var, 1)
        end_time_obj = TP.timestamp_to_obj(end_time_var, 1)
        
        time_diff = end_time_obj - start_time_obj
        time_diff_min = time_diff.seconds / 60.0
        test_interval = int(math.floor(time_diff_min / sample_interval)) # 2 is the interval
            
        metric_types = FDH.get_test_type_metrics(test_type_var)
        metric_types_full = dict()
        
        
        """ Get the full (descriptive) version of the metric names 
            !! FIXME / TODO -- order these properly !! """
        
        for i in range(len(metric_types)):
            metric_types_full[metric_types[i]] = QD.get_metric_full_name(metric_types[i])
        
        
        """ USE generate_reporting_objects() TO GENERATE THE REPORT DATA - dependent on test type """
        
        measured_metric, winner, loser, percent_win, confidence, html_table_pm_banner, html_table_pm_lp, html_table_language, html_table \
        =  generate_reporting_objects(test_name_var, start_time_var, end_time_var, utm_campaign_var, label_dict, label_dict_full, \
                                      sample_interval, test_interval, test_type_var, metric_types, one_step_var, country)
        
        winner_var = winner[0]
        
        results = list()
        for index in range(len(winner)):
            results.append({'metric' : measured_metric[index], 'winner' : winner[index], 'loser': loser[index], 'percent_win' : percent_win[index], 'confidence' : confidence[index]})
            
        template_var_dict = {'results' : results,  \
                  'utm_campaign' : utm_campaign_var, 'metric_names_full' : metric_types_full, \
                  'summary_table': html_table, 'sample_interval' : sample_interval, \
                  'banner_pm_table' : html_table_pm_banner, 'lp_pm_table' : html_table_pm_lp, 'html_table_language' : html_table_language, \
                  'start_time' : TP.timestamp_convert_format(start_time_var, 1, 2) , 'end_time' : TP.timestamp_convert_format(end_time_var, 1, 2)}
        
        html = render_to_response('tests/results_' + test_type_var + '.html', template_var_dict, context_instance=RequestContext(request))
    
        """ 
            WRITE TO TEST TABLE
            =================== 
        
        """
        
        ttl = DL.TestTableLoader()
        
        """ Format the html string """
        html_string = html.__str__()
        html_string = html_string.replace('"', '\\"')
    
        if ttl.record_exists(utm_campaign=utm_campaign_var):
            ttl.update_test_row(test_name=test_name_var,test_type=test_type_var,utm_campaign=utm_campaign_var,start_time=start_time_var,end_time=end_time_var,html_report=html_string, winner=winner_var)
        else:
            ttl.insert_row(test_name=test_name_var,test_type=test_type_var,utm_campaign=utm_campaign_var,start_time=start_time_var,end_time=end_time_var,html_report=html_string, winner=winner_var)
        
        return html

    except Exception as inst:
        
        logging.error('Failed to correctly generate test report.')
        logging.error(type(inst))
        logging.error(inst.args)
        logging.error(inst)
    
        """ Return to the index page with an error """
        try:
            err_msg = 'Test Generation failed for: %s.  Check the fields submitted for generation. <br><br>ERROR:<br><br>%s' % (utm_campaign_var, inst.__str__())
        except:
            err_msg = 'Test Generation failed.  Check the fields submitted for generation. <br><br>ERROR:<br><br>%s' % inst.__str__()
            return campaigns_index(request, kwargs={'err_msg' : err_msg})
        
        return show_campaigns(request, utm_campaign_var, kwargs={'err_msg' : err_msg})

예제 #26

0

파일 보기

def index(request, **kwargs):

    crl = DL.CampaignReportingLoader(query_type='totals')
    filter_data = True
    """ Determine the start and end times for the query """
    start_time_obj = datetime.datetime.utcnow() + datetime.timedelta(days=-1)
    end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3)
    start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
    """ 
        PROCESS POST KWARGS 
        ===================
    """

    err_msg = ''
    try:
        err_msg = str(kwargs['kwargs']['err_msg'])
    except:
        pass
    """ 
        PROCESS POST VARS 
        =================                
    """
    """ Process error message """
    try:
        err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg'])
    except KeyError:
        pass
    """ If the filter form was submitted extract the POST vars  """
    try:
        min_donations_var = MySQLdb._mysql.escape_string(
            request.POST['min_donations'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(
            request.POST['utc_ts'].strip())
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var,
                                                     TP.TS_FORMAT_FORMAT1,
                                                     TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var
        elif cmp(earliest_utc_ts_var, '') == 0:
            start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
        else:
            raise Exception()

        if cmp(min_donations_var, '') == 0:
            min_donations_var = -1
        else:
            min_donations_var = int(min_donations_var)

    except KeyError:  # In the case the form was not submitted set minimum donations and retain the default start time

        min_donations_var = -1
        pass

    except Exception:  # In the case the form was incorrectly formatted notify the user

        min_donations_var = -1
        start_time = TP.timestamp_from_obj(start_time_obj, 1, 3)
        err_msg = 'Filter fields are incorrect.'
    """ 
        GENERATE CAMPAIGN DATA 
        ======================
        
    """
    campaigns, all_data = crl.run_query({
        'metric_name': 'earliest_timestamp',
        'start_time': start_time,
        'end_time': end_time
    })
    """ Sort campaigns by earliest access """
    sorted_campaigns = sorted(campaigns.iteritems(),
                              key=operator.itemgetter(1))
    sorted_campaigns.reverse()
    """ 
        FILTER CAMPAIGN DATA
        ====================
        
    """

    new_sorted_campaigns = list()
    for campaign in sorted_campaigns:
        key = campaign[0]

        if campaign[1] > 0:
            name = all_data[key][0]
            if name == None:
                name = 'none'

            timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2)

            if filter_data:
                if all_data[key][2] > min_donations_var:
                    new_sorted_campaigns.append([
                        campaign[0], campaign[1], name, timestamp,
                        all_data[key][2], all_data[key][4]
                    ])
            else:
                new_sorted_campaigns.append([
                    campaign[0], campaign[1], name, timestamp,
                    all_data[key][2], all_data[key][4]
                ])

    sorted_campaigns = new_sorted_campaigns

    return render_to_response('campaigns/index.html', {
        'campaigns': sorted_campaigns,
        'err_msg': err_msg
    },
                              context_instance=RequestContext(request))

예제 #27

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def index(request, **kwargs):
    
    """ 
        PROCESS POST DATA
        ================= 
    """
    
    if 'err_msg' in kwargs:
        err_msg = kwargs['err_msg']
    else:        
        err_msg = ''

    try:
        
        latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip())
        
        if not TP.is_timestamp(earliest_utc_ts_var, 1) or  not TP.is_timestamp(earliest_utc_ts_var, 1):
            raise TypeError      
            
        if latest_utc_ts_var == '':
            latest_utc_ts_var = _end_time_
            
    except KeyError:
        
        earliest_utc_ts_var = _beginning_time_
        latest_utc_ts_var = _end_time_
    
    except TypeError:
        
        err_msg = 'Please enter a valid timestamp.'
        
        earliest_utc_ts_var = _beginning_time_
        latest_utc_ts_var = _end_time_
            
    ttl = DL.TestTableLoader()
    columns = ttl.get_column_names()
    test_rows = ttl.get_all_test_rows()
    
    """ Build a list of tests -- apply filters """
    l = []
    
    utm_campaign_index = ttl.get_test_index('utm_campaign')
    html_report_index = ttl.get_test_index('html_report')
    
    for i in test_rows:
        test_start_time = ttl.get_test_field(i, 'start_time')
        new_row = list(i)
                
        """ Ensure the timestamp is properly formatted """
        if TP.is_timestamp(test_start_time, 2):
            test_start_time = TP.timestamp_convert_format(test_start_time, 2, 1)
        
        new_row[html_report_index] = '<a href="/tests/report/%s">view</a>' % new_row[utm_campaign_index]
        
        if int(test_start_time) > int(earliest_utc_ts_var) and int(test_start_time) < int(latest_utc_ts_var):
            l.append(new_row)
        
    l.reverse()
    
    test_table = DR.DataReporting()._write_html_table(l, columns, use_standard_metric_names=True)
    
    return render_to_response('tests/index.html', {'err_msg' : err_msg, 'test_table' : test_table},  context_instance=RequestContext(request))

예제 #28

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def generate_summary(request):
    
    try:
        
        err_msg = ''
        
        """ 
            PROCESS POST DATA
            ================= 
            
            Escape all user input that can be entered in text fields 
            
        """
        if 'utm_campaign' in request.POST:
            utm_campaign = MySQLdb._mysql.escape_string(request.POST['utm_campaign'])
        
        if 'start_time' in request.POST:
            start_time = MySQLdb._mysql.escape_string(request.POST['start_time'].strip())
            
            if not(TP.is_timestamp(start_time, 1)) and not(TP.is_timestamp(start_time, 2)):            
                err_msg = 'Incorrectly formatted start timestamp.'
                raise Exception()
            
        if 'end_time' in request.POST:
            end_time = MySQLdb._mysql.escape_string(request.POST['end_time'].strip())
            
            if not(TP.is_timestamp(end_time, 1)) and not(TP.is_timestamp(end_time, 2)):            
                err_msg = 'Incorrectly formatted end timestamp.'
                raise Exception()
        
        if 'iso_filter' in request.POST:
            country = MySQLdb._mysql.escape_string(request.POST['iso_filter'])
        else:
            country = '.{2}'
            
        if 'measure_confidence' in request.POST:
            if cmp(request.POST['measure_confidence'], 'yes') == 0:
                measure_confidence = True 
            else:
                measure_confidence = False
        else:
            measure_confidence = False
        
        if 'one_step' in request.POST:
            if cmp(request.POST['one_step'], 'yes') == 0:
                use_one_step = True 
            else:
                use_one_step = False
        else:
            use_one_step = False
            
        if 'donations_only' in request.POST:
            if cmp(request.POST['donations_only'], 'yes') == 0:
                donations_only = True 
            else:
                donations_only = False
        else:
            donations_only = False

            
        """ Convert timestamp format if necessary """
        
        if TP.is_timestamp(start_time, 2):
            start_time = TP.timestamp_convert_format(start_time, 2, 1)
        if TP.is_timestamp(end_time, 2):
            end_time = TP.timestamp_convert_format(end_time, 2, 1)
        
    
        """ =============================================== """
        
        """ 
            GENERATE A REPORT SUMMARY TABLE
            ===============================
        """
        
        if donations_only:
            srl = DL.SummaryReportingLoader(query_type=FDH._TESTTYPE_DONATIONS_)
        else:
            srl = DL.SummaryReportingLoader(query_type=FDH._TESTTYPE_BANNER_LP_)
            
        srl.run_query(start_time, end_time, utm_campaign, min_views=-1, country=country)            
        
        column_names = srl.get_column_names()
        summary_results = srl.get_results()
         
        if not(summary_results):
            html_table = '<h3>No artifact summary data available for %s.</h3>' % utm_campaign
            
        else:
            summary_results_list = list()
            for row in summary_results:
                summary_results_list.append(list(row))
            summary_results = summary_results_list
            
            """ 
                Format results to encode html table cell markup in results        
            """
            if measure_confidence:
                
                ret = DR.ConfidenceReporting(query_type='', hyp_test='').get_confidence_on_time_range(start_time, end_time, utm_campaign, one_step=use_one_step, country=country) # first get color codes on confidence
                conf_colour_code = ret[0]
                
                for row_index in range(len(summary_results)):
                    
                    artifact_index = summary_results[row_index][0] + '-' + summary_results[row_index][1] + '-' + summary_results[row_index][2]
                    
                    for col_index in range(len(column_names)):
                        
                        is_coloured_cell = False
                        if column_names[col_index] in conf_colour_code.keys():
                            if artifact_index in conf_colour_code[column_names[col_index]].keys():
                                summary_results[row_index][col_index] = '<td style="background-color:' + conf_colour_code[column_names[col_index]][artifact_index] + ';">' + str(summary_results[row_index][col_index]) + '</td>'
                                is_coloured_cell = True
                                
                        if not(is_coloured_cell):
                            summary_results[row_index][col_index] = '<td>' + str(summary_results[row_index][col_index]) + '</td>'
            
                html_table = DR.DataReporting()._write_html_table(summary_results, column_names, use_standard_metric_names=True, omit_cell_markup=True)
                
            else:
                
                html_table = DR.DataReporting()._write_html_table(summary_results, column_names, use_standard_metric_names=True)    
        
        """ Generate totals only if it's a non-donation-only query """
        
        if donations_only:
            srl = DL.SummaryReportingLoader(query_type=FDH._QTYPE_TOTAL_DONATIONS_)
        else:
            srl = DL.SummaryReportingLoader(query_type=FDH._QTYPE_TOTAL_)
            
        srl.run_query(start_time, end_time, utm_campaign, min_views=-1, country=country)
        
        total_summary_results = srl.get_results()
        
        if not(total_summary_results):
            html_table = html_table + '<div class="spacer"></div><div class="spacer"></div><h3>No data available for %s Totals.</h3>' % utm_campaign
        
        else: 
            html_table = html_table + '<div class="spacer"></div><div class="spacer"></div>' + DR.DataReporting()._write_html_table(total_summary_results, srl.get_column_names(), use_standard_metric_names=True)
        
        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table()
        
        html_table = '<h4><u>Metrics Legend:</u></h4><div class="spacer"></div>' + metric_legend_table + \
        '<div class="spacer"></div><h4><u>Confidence Legend for Hypothesis Testing:</u></h4><div class="spacer"></div>' + conf_legend_table + '<div class="spacer"></div><div class="spacer"></div>' + html_table
        
        """ 
            DETERMINE PAYMENT METHODS 
            =========================
        """
        
        ccl = DL.CiviCRMLoader()
        pm_data_counts, pm_data_conversions  = ccl.get_payment_methods(utm_campaign, start_time, end_time, country=country)

        html_table_pm_counts = DR.IntervalReporting().write_html_table_from_rowlists(pm_data_counts, ['Payment Method', 'Portion of Donations (%)'], 'Landing Page')
        html_table_pm_conversions = DR.IntervalReporting().write_html_table_from_rowlists(pm_data_conversions, ['Payment Method', 'Visits', 'Conversions', 'Conversion Rate (%)', 'Amount', 'Amount 25'], 'Landing Page')
        
        html_table = html_table + '<div class="spacer"></div><h4><u>Payment Methods Breakdown:</u></h4><div class="spacer"></div>' + html_table_pm_counts + \
        '<div class="spacer"></div><div class="spacer"></div>' + html_table_pm_conversions + '<div class="spacer"></div><div class="spacer"></div>'
        
        return render_to_response('tests/table_summary.html', {'html_table' : html_table, 'utm_campaign' : utm_campaign}, context_instance=RequestContext(request))

    except Exception as inst:
        
        if cmp(err_msg, '') == 0:
            err_msg = 'Could not generate campaign tabular results.'
        
        return index(request, err_msg=err_msg)

예제 #29

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

 def execute_process(self, key, **kwargs):
     
     logging.info('Commencing caching of long term trends data at:  %s' % self.CACHING_HOME)
     
     end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                       hours=-self.VIEW_DURATION_HRS, resolution=1)
     
     """ DATA CONFIG """
     
     countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
     countries = countries[1:6]
     
     """ set the metrics to plot """
     lttdl = DL.LongTermTrendsLoader(db='storage3')
             
     """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
     # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
     payment_groups = {'Credit Card' : ['^cc$'], 'Paypal': ['^pp$']}
     currency_groups = {'USD' : ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)']}
     lang_cntry_groups = {'US': ['US..', '.{4}'], 'EN' : ['[^U^S]en', '.{4}']}
     
     top_cntry_groups = dict()
     for country in countries:
         top_cntry_groups[country] = [country, '.{2}']
     
     # To include click rate
     # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
     # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]
     
     metrics = ['impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate']
     weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
     metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
     groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
               lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
     
     """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ 
     group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                      ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                      ['country', 'language'], ['payment_method'], ['payment_method']]
     
     metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_]
     
     include_totals = [True, True, True, False, True, True, False, False, False, True]
     include_others = [True, True, True, False, True, True, True, True, True, False]
     hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
     time_unit = [TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR]
     
     data = list()
     
     """ END CONFIG """
     
     
     """ For each metric use the LongTermTrendsLoader to generate the data to plot """
     for index in range(len(metrics)):
         
         dr = DR.DataReporting()
         
         times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                         groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                         include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                         time_unit=time_unit[index])
         
         times = TP.normalize_timestamps(times, False, time_unit[index])
         
         dr._counts_ = counts
         dr._times_ = times
   
         empty_data = [0] * len(times[times.keys()[0]])
         data.append(dr.get_data_lists([''], empty_data))
         
     dict_param = Hlp.combine_data_lists(data)
     dict_param['interval'] = self.VIEW_DURATION_HRS    
     dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
     
     self.clear_cached_data(key)
     self.cache_data(dict_param, key)
     
     logging.info('Caching complete.')

예제 #30

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of live results data at:  %s' %
                     self.CACHING_HOME)
        shelve_key = key
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')

        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'
        campaign_regexp_filter = '^C_|^C11_'

        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(
            datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        """ Should a one-step query be used? """
        use_one_step = lptl.is_one_step(
            start_time, end_time, 'C11'
        )  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """

        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'

        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(
            latest_timestamp, 2, 1)

        ret = DR.ConfidenceReporting(query_type='', hyp_test='',
                                     db='db1025').get_confidence_on_time_range(
                                         start_time,
                                         end_time,
                                         campaign_regexp_filter,
                                         one_step=use_one_step)
        measured_metrics_counts = ret[1]
        """ Prepare Summary results """

        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

        logging.info('Executing report_summary_results ...')

        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()

        if use_one_step:

            logging.info('... including one step artifacts ...')

            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ +
                                              query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)

            results = list(results)
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))

            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not (key in one_step_keys):
                    new_results.append(row)
            results = new_results

            results.extend(list(results_1S))

        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(
            query_type='bannerlp',
            hyp_test='TTest').get_confidence_legend_table()
        """ Create a interval loader objects """

        sampling_interval = 5  # 5 minute sampling interval for donation plots

        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ +
                                        FDH._QTYPE_TIME_,
                                        generate_plot=False,
                                        db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ +
                                         FDH._QTYPE_TIME_,
                                         generate_plot=False,
                                         db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ +
                                     FDH._QTYPE_TIME_,
                                     generate_plot=False,
                                     db='db1025')
        """ Execute queries """
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',
                     {})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',
                      {})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {})
        """ Prepare serialized objects """

        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table

        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS

        dict_param['start_time'] = TP.timestamp_convert_format(
            start_time, 1, 2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_

        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_

        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)

        logging.info('Caching complete.')

예제 #31

0

파일 보기

파일: DataCaching.py 프로젝트: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):
        
        logging.info('Commencing caching of live results data at:  %s' % self.CACHING_HOME)
        shelve_key = key
        
        """ Find the earliest and latest page views for a given campaign  """
        lptl = DL.LandingPageTableLoader(db='db1025')
            
        query_name = 'report_summary_results_country.sql'
        query_name_1S = 'report_summary_results_country_1S.sql'                    
        campaign_regexp_filter = '^C_|^C11_'
                
        dl = DL.DataLoader(db='db1025')
        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS)
        
        """ Should a one-step query be used? """        
        use_one_step = lptl.is_one_step(start_time, end_time, 'C11')  # Assume it is a one step test if there are no impressions for this campaign in the landing page table
        
        """ 
            Retrieve the latest time for which impressions have been loaded
            ===============================================================
        """
        
        sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00'
        
        results = dl.execute_SQL(sql_stmnt)
        latest_timestamp = results[0][0]
        latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3)
        latest_timestamp_flat = TP.timestamp_convert_format(latest_timestamp, 2, 1)
    
        ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range(start_time, end_time, campaign_regexp_filter, one_step=use_one_step)
        measured_metrics_counts = ret[1]
        
        """ Prepare Summary results """
        
        sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name)
        sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                 start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                 start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)        
        
        logging.info('Executing report_summary_results ...')
        
        results = dl.execute_SQL(sql_stmnt)
        column_names = dl.get_column_names()
        
        if use_one_step:
            
            logging.info('... including one step artifacts ...')
            
            sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S)
            sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \
                                     start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \
                                     start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter)
            
            results = list(results)        
            results_1S = dl.execute_SQL(sql_stmnt_1S)
            
            """ Ensure that the results are unique """
            one_step_keys = list()
            for row in results_1S:
                one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2]))
            
            new_results = list()
            for row in results:
                key = str(row[0]) + str(row[1]) + str(row[2])
                if not(key in one_step_keys):
                    new_results.append(row)
            results = new_results
                
            results.extend(list(results_1S))
            
        metric_legend_table = DR.DataReporting().get_standard_metrics_legend()
        conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table()

        """ Create a interval loader objects """
        
        sampling_interval = 5 # 5 minute sampling interval for donation plots
        
        ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
        ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025')
            
        """ Execute queries """        
        ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',{})
        ir_lp.run(start_time, end_time, sampling_interval, 'donations', '',{})
        
        
        """ Prepare serialized objects """
        
        dict_param = dict()

        dict_param['metric_legend_table'] = metric_legend_table
        dict_param['conf_legend_table'] = conf_legend_table
        
        dict_param['measured_metrics_counts'] = measured_metrics_counts
        dict_param['results'] = results
        dict_param['column_names'] = column_names

        dict_param['interval'] = sampling_interval
        dict_param['duration'] = self.DURATION_HRS    
        
        dict_param['start_time'] = TP.timestamp_convert_format(start_time,1,2)
        dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
        
        dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_
        dict_param['ir_banner_counts'] = ir_banner._counts_
        dict_param['ir_lp_counts'] = ir_lp._counts_
        
        dict_param['ir_cmpgn_times'] = ir_cmpgn._times_
        dict_param['ir_banner_times'] = ir_banner._times_
        dict_param['ir_lp_times'] = ir_lp._times_
        
        self.clear_cached_data(shelve_key)
        self.cache_data(dict_param, shelve_key)
        
        logging.info('Caching complete.')

예제 #32

0

파일 보기

파일: QueryData.py 프로젝트: rfaulkner/WMF_Analytics

def format_query(query_name, sql_stmnt, args, **kwargs):
    
    country, min_donation, order_str = process_kwargs(kwargs)
    
    if cmp(query_name, 'report_campaign_ecomm') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))

    elif cmp(query_name, 'report_campaign_logs') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time, start_time, start_time))

    elif cmp(query_name, 'report_campaign_ecomm_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_hr') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time, '%'))

    elif cmp(query_name, 'report_impressions_country') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time))

    elif cmp(query_name, 'report_campaign_logs_by_min') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
        start_time, '%', '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_non_US_clicks') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time))
    
    elif cmp(query_name, 'report_contribution_tracking') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', '%',start_time))
    
    elif cmp(query_name, 'report_total_amounts_by_hr') == 0:
        start_time = args[0]
        end_time = args[1]            
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time))
    
    elif cmp(query_name, 'report_total_amounts_by_day') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '', start_time, end_time))
    
    elif cmp(query_name, 'report_LP_metrics') == 0 or cmp(query_name, 'report_LP_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
        
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    elif cmp(query_name, 'report_banner_metrics') == 0 or cmp(query_name, 'report_bannerLP_metrics') == 0 or cmp(query_name, 'report_total_metrics') == 0 or \
    cmp(query_name, 'report_banner_metrics_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_1S') == 0 or cmp(query_name, 'report_total_metrics_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        min_views = args[3]
            
        """ Format the condition for minimum views """
        if cmp(str(min_views), '-1') == 0:
            min_views = ' '
        else:
            min_views = 'where lp.views > ' + str(min_views) + ' '
            
        sql_stmnt = str(sql_stmnt % (start_time, end_time, country, start_time, end_time, campaign, country, start_time, end_time, country, \
                                     start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views))
        
    
    elif cmp(query_name, 'report_latest_campaign') == 0:
        start_time = args[0]
        sql_stmnt = str(sql_stmnt % (start_time))
            
    elif cmp(query_name, 'report_banner_impressions_by_hour') == 0:
        start = args[0]
        end = args[1]
        sql_stmnt = str(sql_stmnt % ('%','%','%','%', start, end))
                
    elif cmp(query_name, 'report_ecomm_by_amount') == 0:
        start_time = args[0]
        end_time = args[1]
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', start_time, end_time, end_time))
    
    elif cmp(query_name, 'report_ecomm_by_contact') == 0:
        where_str = args[0]
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', where_str))
    
    elif cmp(query_name, 'report_LP_metrics_minutely') == 0 or cmp(query_name, 'report_LP_metrics_minutely_1S') == 0:
        
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                 start_time, end_time, campaign, country, campaign))
    
    elif cmp(query_name, 'report_banner_metrics_minutely') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely') == 0 or cmp(query_name, 'report_banner_metrics_minutely_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely_1S') == 0:
    
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        """ The start time for the impression portion of the query should be one second less"""
        start_time_obj = TP.timestamp_to_obj(start_time,1)
        imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
        imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, imp_start_time_obj_str, end_time, \
                                     country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, country, \
                                '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, \
                                country, start_time, end_time, campaign, country, campaign, ))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_1S') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_total') == 0 \
    or cmp(query_name, 'report_campaign_metrics_minutely_total_1S') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % (campaign, '%', '%', '%',  '%', interval, interval, start_time, end_time, campaign, country, '%', '%',  '%',  '%', interval, interval, start_time, end_time, campaign, country))
        
    elif cmp(query_name, 'report_campaign_totals') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time))
    
    elif cmp(query_name, 'report_campaign_banners') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
        
    elif cmp(query_name, 'report_campaign_lps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_bannerlps') == 0:
        start_time = args[0]
        end_time = args[1]
        utm_campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign))
    
    elif cmp(query_name, 'report_campaign_metrics_minutely_all') == 0 or cmp(query_name, 'report_banner_metrics_minutely_all') == 0 or cmp(query_name, 'report_lp_metrics_minutely_all') == 0:
        start_time = args[0]
        end_time = args[1]
        interval = args[3]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%',  '%', interval, interval, start_time, end_time))

    elif cmp(query_name, 'report_donation_metrics') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country))

    elif cmp(query_name, 'report_total_donations') == 0:
        start_time = args[0]
        end_time = args[1]
        campaign = args[2]
        
        """ Recursively construct the sub-query """
        sub_query_name = 'report_donation_metrics'
        sub_query_sql = Hlp.file_to_string(projSet.__sql_home__ + sub_query_name + '.sql')
        sub_query_sql = format_query(sub_query_name, sub_query_sql, [start_time, end_time, campaign], country=country)        

        sql_stmnt = str(sql_stmnt % sub_query_sql)

    elif cmp(query_name, 'report_daily_totals_by_country') == 0:
        start_time = args[0]
        end_time = args[1]
        
        sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, end_time, country, min_donation, order_str))


    else:
        return 'no such table\n'

    return sql_stmnt

예제 #33

0

파일 보기

파일: views.py 프로젝트: rfaulkner/WMF_Analytics

def index(request, **kwargs):
    
    crl = DL.CampaignReportingLoader(query_type='totals')
    filter_data = True
            
    """ Determine the start and end times for the query """ 
    start_time_obj =  datetime.datetime.utcnow() + datetime.timedelta(days=-1)
    end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(),1,3)    
    start_time = TP.timestamp_from_obj(start_time_obj,1,3)
    
    """ 
        PROCESS POST KWARGS 
        ===================
    """
    
    err_msg = ''
    try:
        err_msg = str(kwargs['kwargs']['err_msg'])
    except:
        pass
    
    """ 
        PROCESS POST VARS 
        =================                
    """
    
    """ Process error message """
    try:
        err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg'])
    except KeyError:
        pass

    """ If the filter form was submitted extract the POST vars  """
    try:
        min_donations_var = MySQLdb._mysql.escape_string(request.POST['min_donations'].strip())
        earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['utc_ts'].strip())
        
        """ If the user timestamp is earlier than the default start time run the query for the earlier start time  """
        ts_format = TP.getTimestampFormat(earliest_utc_ts_var)
    
        """ Ensure the validity of the timestamp input """
        if ts_format == TP.TS_FORMAT_FORMAT1:
            start_time = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT)
        elif ts_format == TP.TS_FORMAT_FLAT:
            start_time = earliest_utc_ts_var
        elif cmp(earliest_utc_ts_var, '') == 0:
            start_time = TP.timestamp_from_obj(start_time_obj,1,3)
        else:
            raise Exception()
        
        if cmp(min_donations_var, '') == 0:
            min_donations_var = -1
        else:
            min_donations_var = int(min_donations_var)
    
    except KeyError: # In the case the form was not submitted set minimum donations and retain the default start time 
        
        min_donations_var = -1
        pass
    
    except Exception: # In the case the form was incorrectly formatted notify the user
        
        min_donations_var = -1
        start_time = TP.timestamp_from_obj(start_time_obj,1,3)      
        err_msg = 'Filter fields are incorrect.'
    


    """ 
        GENERATE CAMPAIGN DATA 
        ======================
        
    """
    campaigns, all_data = crl.run_query({'metric_name' : 'earliest_timestamp', 'start_time' : start_time, 'end_time' : end_time})

    """ Sort campaigns by earliest access """    
    sorted_campaigns = sorted(campaigns.iteritems(), key=operator.itemgetter(1))
    sorted_campaigns.reverse()
    
    """ 
        FILTER CAMPAIGN DATA
        ====================
        
    """

    new_sorted_campaigns = list()
    for campaign in sorted_campaigns:
        key = campaign[0]
        
        if campaign[1] > 0:
            name = all_data[key][0]
            if name  == None:
                name = 'none'
            
            timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2)
            
            if filter_data: 
                if all_data[key][2] > min_donations_var:
                    new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]])
            else:
                new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]])
    
    sorted_campaigns = new_sorted_campaigns

    return render_to_response('campaigns/index.html', {'campaigns' : sorted_campaigns, 'err_msg' : err_msg}, context_instance=RequestContext(request))