def test_n_combinations(): coll = [1, 2, 3, 4] combs = df_metrics.n_combinations(coll, n=2, must_include=[1], permutations=False) assert combs == [(1, 2), (1, 3), (1, 4)] coll = [1, 2, 3, 4] combs = df_metrics.n_combinations(coll, n=3, permutations=False) assert combs == [(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)]
def __init__(self, other_names=('k1', 'k2', 'k3'), calc_tau=False, dataset_names=None, metadata_template=None): other_names = list(other_names) super(IntercomparisonMetrics, self).__init__(other_name=other_names, metadata_template=metadata_template) # string that splits the dataset names and metric names in the output # e.g. 'metric_between_dataset1_and_dataset2' self.ds_names_split, self.metric_ds_split = '_and_', '_between_' self.df_columns = ['ref'] + self.other_name self.calc_tau = calc_tau if dataset_names is None: self.ds_names = self.df_columns else: self.ds_names = dataset_names self.ds_names_lut = {} for name, col in zip(self.ds_names, self.df_columns): self.ds_names_lut[col] = name combis = n_combinations(self.df_columns, 2, must_include='ref') self.tds_names = [] for combi in combis: self.tds_names.append("{1}{0}{2}".format(self.ds_names_split, *combi)) # metrics that are equal for all datasets metrics_common = ['n_obs'] # metrics that are calculated between dataset pairs metrics_tds = [ 'R', 'p_R', 'rho', 'p_rho', 'BIAS', 'RMSD', 'mse', 'RSS', 'mse_corr', 'mse_bias', 'urmsd', 'mse_var', 'tau', 'p_tau' ] metrics_common = _get_metric_template(metrics_common) metrics_tds = _get_metric_template(metrics_tds) for metric in metrics_common.keys(): self.result_template[metric] = metrics_common[metric].copy() for tds_name in self.tds_names: split_tds_name = tds_name.split(self.ds_names_split) tds_name_key = \ self.ds_names_split.join([self.ds_names_lut[split_tds_name[0]], self.ds_names_lut[split_tds_name[1]]]) for metric in metrics_tds.keys(): key = self.metric_ds_split.join([metric, tds_name_key]) self.result_template[key] = metrics_tds[metric].copy() if not calc_tau: self.result_template.pop('tau', None) self.result_template.pop('p_tau', None)
def _make_names(self): tds_names, thds_names = [], [] combis_2 = n_combinations(self.df_columns, 2, must_include=[self.ref_name]) combis_3 = n_combinations(self.df_columns, 3, must_include=[self.ref_name]) for combi in combis_2: tds_names.append(self.ds_names_split.join(combi)) for combi in combis_3: thds_names.append("{1}{0}{2}{0}{3}".format(self.ds_names_split, *combi)) return tds_names, thds_names