Python TimestampProcessor.normalize_timestamps Examples, classes.TimestampProcessor.normalize_timestamps Python Examples

Example #1

0

Show file

File: DataFilters.py Project: rfaulkner/WMF_Analytics

    def execute(self):
        
        DataFilter.execute(self)
        
        counts = self._mutable_obj_._counts_
        times = self._mutable_obj_._times_
        item_keys = self._mutable_obj_._item_keys_

        """ Select only the specified item keys """
        if len(item_keys) > 0:
            counts = self._mutable_obj_.select_metric_keys(counts)
            times = self._mutable_obj_.select_metric_keys(times)
        
        """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
        for key in times.keys():
            times[key] = TP.normalize_timestamps(times[key], False, 3)        

            """ BUG FIX - remove any repeated indices """
            fixed_times = list()
            for time_index in range(len(times[key]) - 1):
                if times[key][time_index] != times[key][time_index + 1]:
                    fixed_times.append(times[key][time_index])
            if len(times[key]) > 0:
                fixed_times.append(times[key][-1])
            times[key] = fixed_times
            times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], self._interval_)                    
            
        """ If there are missing metrics add them as zeros """
        for artifact_key in self._artifact_keys_:

            if not(artifact_key in times.keys()):
                times[artifact_key] = times[times.keys()[0]]
                counts[artifact_key] = [0.0] * len(times[artifact_key])
        
        """  Remove artifacts not in the list if there are any labels specified """
        if len(self._artifact_keys_) > 0:
            for key in counts.keys():
                if key not in self._artifact_keys_:
                    del counts[key]
                    del times[key]

Example #2

0

Show file

File: DataFilters.py Project: rfaulkner/WMF_Analytics

    def execute(self):

        DataFilter.execute(self)

        counts = self._mutable_obj_._counts_
        times = self._mutable_obj_._times_
        item_keys = self._mutable_obj_._item_keys_
        """ Select only the specified item keys """
        if len(item_keys) > 0:
            counts = self._mutable_obj_.select_metric_keys(counts)
            times = self._mutable_obj_.select_metric_keys(times)
        """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
        for key in times.keys():
            times[key] = TP.normalize_timestamps(times[key], False, 3)
            """ BUG FIX - remove any repeated indices """
            fixed_times = list()
            for time_index in range(len(times[key]) - 1):
                if times[key][time_index] != times[key][time_index + 1]:
                    fixed_times.append(times[key][time_index])
            if len(times[key]) > 0:
                fixed_times.append(times[key][-1])
            times[key] = fixed_times
            times[key], counts[key] = TP.normalize_intervals(
                times[key], counts[key], self._interval_)
        """ If there are missing metrics add them as zeros """
        for artifact_key in self._artifact_keys_:

            if not (artifact_key in times.keys()):
                times[artifact_key] = times[times.keys()[0]]
                counts[artifact_key] = [0.0] * len(times[artifact_key])
        """  Remove artifacts not in the list if there are any labels specified """
        if len(self._artifact_keys_) > 0:
            for key in counts.keys():
                if key not in self._artifact_keys_:
                    del counts[key]
                    del times[key]

Example #3

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

    def execute_process(self, key, **kwargs):

        logging.info('Commencing caching of long term trends data at:  %s' %
                     self.CACHING_HOME)

        end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                          hours=-self.VIEW_DURATION_HRS, resolution=1)
        """ DATA CONFIG """

        countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
        countries = countries[1:6]
        """ set the metrics to plot """
        lttdl = DL.LongTermTrendsLoader(db='storage3')
        """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
        # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
        payment_groups = {'Credit Card': ['^cc$'], 'Paypal': ['^pp$']}
        currency_groups = {
            'USD': ['(USD)'],
            'CAD': ['(CAD)'],
            'JPY': ['(JPY)'],
            'EUR': ['(EUR)']
        }
        lang_cntry_groups = {
            'US': ['US..', '.{4}'],
            'EN': ['[^U^S]en', '.{4}']
        }

        top_cntry_groups = dict()
        for country in countries:
            top_cntry_groups[country] = [country, '.{2}']

        # To include click rate
        # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
        # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]

        metrics = [
            'impressions', 'views', 'donations', 'donations', 'amount',
            'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate'
        ]
        weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
        metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
        groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
                  lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
        """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """
        group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                         ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                         ['country', 'language'], ['payment_method'], ['payment_method']]

        metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                        DL.LongTermTrendsLoader._MT_RATE_]

        include_totals = [
            True, True, True, False, True, True, False, False, False, True
        ]
        include_others = [
            True, True, True, False, True, True, True, True, True, False
        ]
        hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
        time_unit = [
            TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR,
            TP.HOUR, TP.HOUR, TP.HOUR
        ]

        data = list()
        """ END CONFIG """
        """ For each metric use the LongTermTrendsLoader to generate the data to plot """
        for index in range(len(metrics)):

            dr = DR.DataReporting()

            times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                            groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                            include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                            time_unit=time_unit[index])

            times = TP.normalize_timestamps(times, False, time_unit[index])

            dr._counts_ = counts
            dr._times_ = times

            empty_data = [0] * len(times[times.keys()[0]])
            data.append(dr.get_data_lists([''], empty_data))

        dict_param = Hlp.combine_data_lists(data)
        dict_param['interval'] = self.VIEW_DURATION_HRS
        dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2)

        self.clear_cached_data(key)
        self.cache_data(dict_param, key)

        logging.info('Caching complete.')

Example #4

0

Show file

File: DataCaching.py Project: rfaulkner/WMF_Analytics

 def execute_process(self, key, **kwargs):
     
     logging.info('Commencing caching of long term trends data at:  %s' % self.CACHING_HOME)
     
     end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, \
                                                       hours=-self.VIEW_DURATION_HRS, resolution=1)
     
     """ DATA CONFIG """
     
     countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_time)
     countries = countries[1:6]
     
     """ set the metrics to plot """
     lttdl = DL.LongTermTrendsLoader(db='storage3')
             
     """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """
     # country_groups = {'US': ['(US)'], 'CA': ['(CA)'], 'JP': ['(JP)'], 'IN': ['(IN)'], 'NL': ['(NL)']}
     payment_groups = {'Credit Card' : ['^cc$'], 'Paypal': ['^pp$']}
     currency_groups = {'USD' : ['(USD)'], 'CAD': ['(CAD)'], 'JPY': ['(JPY)'], 'EUR': ['(EUR)']}
     lang_cntry_groups = {'US': ['US..', '.{4}'], 'EN' : ['[^U^S]en', '.{4}']}
     
     top_cntry_groups = dict()
     for country in countries:
         top_cntry_groups[country] = [country, '.{2}']
     
     # To include click rate
     # groups = [ lang_cntry_groups] metrics = ['click_rate'] metrics_index = [3]
     # group_metrics = [DL.LongTermTrendsLoader._MT_RATE_] metric_types = ['country', 'language'] include_totals = [True] include_others = [True]
     
     metrics = ['impressions', 'views', 'donations', 'donations', 'amount', 'amount', 'diff_don', 'diff_don', 'donations', 'conversion_rate']
     weights = ['', '', '', '', '', '', 'donations', 'donations', '', '']
     metrics_index = [0, 1, 2, 2, 2, 4, 5, 5, 6, 6]
     groups = [lang_cntry_groups, lang_cntry_groups, lang_cntry_groups, top_cntry_groups, lang_cntry_groups, currency_groups, \
               lang_cntry_groups, lang_cntry_groups, payment_groups, payment_groups]
     
     """  The metrics that are used to build a group string to be qualified via regex - the values of the list metrics are concatenated """ 
     group_metrics = [['country', 'language'], ['country', 'language'], ['country', 'language'], \
                      ['country', 'language'], ['country', 'language'], ['currency'], ['country', 'language'], \
                      ['country', 'language'], ['payment_method'], ['payment_method']]
     
     metric_types = [DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_RATE_WEIGHTED_, DL.LongTermTrendsLoader._MT_AMOUNT_, \
                     DL.LongTermTrendsLoader._MT_RATE_]
     
     include_totals = [True, True, True, False, True, True, False, False, False, True]
     include_others = [True, True, True, False, True, True, True, True, True, False]
     hours_back = [0, 0, 0, 0, 0, 0, 24, 168, 0, 0]
     time_unit = [TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR, TP.HOUR]
     
     data = list()
     
     """ END CONFIG """
     
     
     """ For each metric use the LongTermTrendsLoader to generate the data to plot """
     for index in range(len(metrics)):
         
         dr = DR.DataReporting()
         
         times, counts = lttdl.run_query(start_time, end_time, metrics_index[index], metric_name=metrics[index], metric_type=metric_types[index], \
                                         groups=groups[index], group_metric=group_metrics[index], include_other=include_others[index], \
                                         include_total=include_totals[index], hours_back=hours_back[index], weight_name=weights[index], \
                                         time_unit=time_unit[index])
         
         times = TP.normalize_timestamps(times, False, time_unit[index])
         
         dr._counts_ = counts
         dr._times_ = times
   
         empty_data = [0] * len(times[times.keys()[0]])
         data.append(dr.get_data_lists([''], empty_data))
         
     dict_param = Hlp.combine_data_lists(data)
     dict_param['interval'] = self.VIEW_DURATION_HRS    
     dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2)
     
     self.clear_cached_data(key)
     self.cache_data(dict_param, key)
     
     logging.info('Caching complete.')