def linear_interpolate(df, offset, final_year='2050', harmonize_year='2015'): """Calculate linearly interpolated convergence harmonized trajectory Parameters ---------- df : pd.DataFrame model data offset : pd.DataFrame offset data final_year : string, optional column name of convergence year harmonize_year : string, optional column name of harmonization year Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() x1, x2 = harmonize_year, final_year y1, y2 = offset + df[x1], df[x2] m = (y2 - y1) / (float(x2) - float(x1)) b = y1 - m * float(x1) cols = [x for x in utils.numcols(df) if int(x) < int(final_year)] for c in cols: df[c] = m * float(c) + b return df
def reduce_offset(df, offset, final_year='2050', harmonize_year='2015'): """Calculate offset convergence harmonized trajectory Parameters ---------- df : pd.DataFrame model data offset : pd.DataFrame offset data final_year : string, optional column name of convergence year harmonize_year : string, optional column name of harmonization year Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() yi, yf = int(harmonize_year), int(final_year) numcols = utils.numcols(df) # get factors that reduce from 1 to 0; factors before base year are > 1 f = lambda year: -(year - yi) / float(yf - yi) + 1 factors = [f(int(year)) if year <= final_year else 0.0 for year in numcols] # add existing values to offset time series offsets = pd.DataFrame(np.outer(offset, factors), columns=numcols, index=offset.index) df[numcols] = df[numcols] + offsets return df
def __init__(self, data, history, config={}, method_choice=None, verify_indicies=True): """Parameters ---------- data : pd.DataFrame model data in standard calculation format history : pd.DataFrame history data in standard calculation format config : dict, optional configuration dictionary (see http://mattgidden.com/aneris/config.html for options) verify_indicies : bool, optional check indicies of data and history, provide warning message if different """ if not isinstance(data.index, pd.MultiIndex): raise ValueError('Data must use utils.df_idx') if not isinstance(history.index, pd.MultiIndex): raise ValueError('History must use utils.df_idx') if verify_indicies and not data.index.equals(history.index): idx = history.index.difference(data.index) msg = 'More history than model reports, adding 0 values {}' _warn(msg.format(idx.to_series().head())) df = pd.DataFrame(0, columns=data.columns, index=idx) data = pd.concat([data, df]).sort_index().loc[history.index] assert data.index.equals(history.index) key = 'harmonize_year' # TODO type self.base_year = str(config[key]) if key in config else '2015' self.data = data[utils.numcols(data)] self.model = pd.Series(index=self.data.index, name=self.base_year).to_frame() self.history = history self.methods_used = None self.offsets, self.ratios = harmonize_factors(self.data, self.history, self.base_year) self.method_choice = method_choice # get default methods to use in decision tree self.ratio_method = config.get('default_ratio_method') self.offset_method = config.get('default_offset_method') self.luc_method = config.get('default_luc_method') self.luc_cov_threshold = config.get('luc_cov_threshold')
def constant_ratio(df, ratios): """Calculate constant ratio harmonized trajectory Parameters ---------- df : pd.DataFrame model data ratio : pd.DataFrame ratio data Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() numcols = utils.numcols(df) # just add offset to all values df[numcols] = df[numcols].multiply(ratios, axis=0) return df
def constant_offset(df, offset): """Calculate constant offset harmonized trajectory Parameters ---------- df : pd.DataFrame model data offset : pd.DataFrame offset data Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() numcols = utils.numcols(df) # just add offset to all values df[numcols] = df[numcols].add(offset, axis=0) return df
def constant_ratio(df, ratios, harmonize_year='2015'): """Calculate constant ratio harmonized trajectory Parameters ---------- df : pd.DataFrame model data ratio : pd.DataFrame ratio data harmonize_year : string, optional column name of harmonization year, ignored Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() numcols = utils.numcols(df) # just add offset to all values df[numcols] = df[numcols].multiply(ratios, axis=0) return df
def reduce_ratio(df, ratios, final_year='2050', harmonize_year='2015'): """Calculate ratio convergence harmonized trajectory Parameters ---------- df : pd.DataFrame model data ratio : pd.DataFrame ratio data final_year : string, optional column name of convergence year harmonize_year : string, optional column name of harmonization year Returns ------- df : pd.DataFrame harmonized trajectories """ df = df.copy() yi, yf = int(harmonize_year), int(final_year) numcols = utils.numcols(df) # get factors that reduce from 1 to 0, but replace with 1s in years prior # to harmonization f = lambda year: -(year - yi) / float(yf - yi) + 1 prefactors = [ f(int(harmonize_year)) for year in numcols if year < harmonize_year ] postfactors = [ f(int(year)) if year <= final_year else 0.0 for year in numcols if year >= harmonize_year ] factors = prefactors + postfactors # multiply existing values by ratio time series ratios = pd.DataFrame( np.outer(ratios - 1, factors), columns=numcols, index=ratios.index) + 1 df[numcols] = df[numcols] * ratios return df
def diagnostics(unharmonized, model, metadata, config=None): """Provide warnings or throw errors based on harmonized model data and metadata Current diagnostics are: - large missing values (sector has 20% or more contribution to history and model does not report sector) - Warning provided - non-negative CO2 emissions (values other than CO2 are < 0) - Error thrown Parameters ---------- unharmonized : pd.DataFrame unharmonized model data in standard calculation format model : pd.DataFrame harmonized model data in standard calculation format metadata : pd.DataFrame harmonization metadata config : dictionary, optional ratio values to use in diagnostics, key options include 'mid' and 'end'. """ config = config or {'mid': 4.0, 'end': 2.0} # # Detect Large Missing Values # num = metadata['history'] denom = metadata['history'].groupby(level=['region', 'gas']).sum() # special merge because you can't do operations on multiindex ratio = pd.merge(num.reset_index(), denom.reset_index(), on=['region', 'gas']) ratio = ratio['history_x'] / ratio['history_y'] ratio.index = num.index ratio.name = 'fraction' # downselect big = ratio[ratio > 0.2] bigmethods = metadata.loc[big.index, 'method'] bad = bigmethods[bigmethods == 'model_zero'] report = big.loc[bad.index].reset_index() if not report.empty: _warn('LARGE MISSING Values Found!!:\n {}'.format(report)) # # report on large medium an dlong-term differences # cols = utils.numcols(model) report = model.copy() mid, end = cols[len(cols) // 2 - 1], cols[-1] if 'mid' in config: bigmid = np.abs(model[mid] - unharmonized[mid]) / unharmonized[mid] bigmid = bigmid[bigmid > config['mid']] report['{}_diff'.format(mid)] = bigmid if 'end' in config: bigend = np.abs(model[end] - unharmonized[end]) / unharmonized[end] bigend = bigend[bigend > config['end']] report['{}_diff'.format(end)] = bigend report = report.drop(cols, axis=1).dropna(how='all') report['method'] = metadata.loc[report.index, 'method'] report = report[~report['method'].isin(['model_zero', np.nan])] # # Detect non-negative CO2 emissions # m = model.reset_index() m = m[m.gas != 'CO2'] neg = m[(m[utils.numcols(m)].T < 0).any()] if not neg.empty: _warn( 'Negative Emissions found for non-CO2 gases:\n {}'.format(neg)) raise ValueError('Harmonization failed due to negative non-CO2 gases') return report