Beispiel #1
0
    def _get_trend(self, trend, tamconfig, data_sources):
        """Decision tree to select between trend choices.

           Arguments:
             trend: explicit trend to use, if any. Pass None to have tamconfig be used.
             tamconfig: the row from self.tamconfig to use
             data_sources: dict of dicts of datasources, as described in tam_ref_data_sources in
               the constructor

           If a trend was explictly specified, use it.
           If there is only one data source, use that source without any curve fitting.
           Otherwise, use the curve fit algorithm specified in the tamconfig.
        """
        if trend:
            return trend
        if not interpolation.is_group_name(data_sources=data_sources,
                name=tamconfig['source_after_2014']):
            return 'single'
        else:
            return tamconfig['trend']
Beispiel #2
0
    def _low_med_high(self, adoption_data, min_max_sd, adconfig, source, data_sources, region):
        """Return the selected data sources as Medium, and N stddev away as Low and High."""
        result = pd.DataFrame(index=adoption_data.index.copy(), columns=['Low', 'Medium', 'High'])
        region_key = None if region is None else f'Region: {region}'
        columns = interpolation.matching_data_sources(data_sources=data_sources, name=source,
                groups_only=False, region_key=region_key)
        if columns is None:
            result.loc[:, 'Medium'] = np.nan
            result.loc[:, 'Low'] = np.nan
            result.loc[:, 'High'] = np.nan
        else:
            if len(columns) == 1:
                is_group = interpolation.is_group_name(data_sources=data_sources, name=columns[0])
            else:
                is_group = True

            if is_group:
                # In Excel, the Mean computation is:
                # SUM($C46:$Q46)/COUNTIF($C46:$Q46,">0")
                #
                # The intent is to skip sources which are empty, but also means that
                # a source where the real data is 0.0 will not impact the Medium result.
                #
                # See this document for more information:
                # https://docs.google.com/document/d/19sq88J_PXY-y_EnqbSJDl0v9CdJArOdFLatNNUFhjEA/edit#heading=h.yvwwsbvutw2j
                #
                # We're matching the Excel behavior in the initial product. This decision can
                # be revisited later, when matching results from Excel is no longer required.
                # To revert, use:    medium = adoption_data.loc[:, columns].mean(axis=1)
                medium = adoption_data.loc[:, columns].mask(lambda f: f == 0.0, np.nan).mean(axis=1)
            else:
                # if there is only a single source, Excel uses it directly without taking a Mean.
                medium = adoption_data.loc[:, columns[0]]
            result.loc[:, 'Medium'] = medium
            result.loc[:, 'Low'] = medium - (min_max_sd.loc[:, 'S.D'] * adconfig.loc['low_sd_mult'])
            result.loc[:, 'High'] = medium + (
                min_max_sd.loc[:, 'S.D'] * adconfig.loc['high_sd_mult'])
        return result
Beispiel #3
0
 def adoption_is_single_source(self):
     """Whether the source data selected is one source or multiple."""
     return not interpolation.is_group_name(
         data_sources=self.data_sources,
         name=self.ac.soln_pds_adoption_prognostication_source)