def dominance(self, data, tournament=None, sort_by='corr'): """Mean (across eras) of fraction of models bested per era""" columns = ['corr', 'mse'] mpe, regions = metrics_per_era(data, self, tournament, columns=columns) dfs = [] for i, col in enumerate(columns): pivot = mpe.pivot(index='era', columns='pair', values=col) pairs = pivot.columns.tolist() a = pivot.values n = a.shape[1] - 1.0 if n == 0: raise ValueError("Must have at least two pairs") m = [] for j in range(pivot.shape[1]): if col == 'corr': z = (a[:, j].reshape(-1, 1) < a).sum(axis=1) / n else: z = (a[:, j].reshape(-1, 1) > a).sum(axis=1) / n m.append(z.mean()) df = pd.DataFrame(data=m, index=pairs, columns=[col]) dfs.append(df) df = pd.concat(dfs, axis=1) df = add_split_pairs(df) df = df.sort_values([sort_by], ascending=[False]) return df
def compare(self, data, prediction, tournament=None): "Compare performance of predictions with the same names" pairs = [] for pair in self.pairs(as_str=False): if pair in prediction: pairs.append(pair) cols = [ 'logloss1', 'logloss2', 'win1', 'corr', 'maxdiff', 'ystd1', 'ystd2' ] comp = pd.DataFrame(columns=cols, index=pairs) if len(pairs) == 0: return comp ids = data.ids df1 = self.loc[ids] df2 = prediction.loc[ids] p1 = self[pairs] p2 = prediction[pairs] m1 = p1.metrics_per_era(data, tournament, metrics=['logloss'], era_as_str=False) m2 = p2.metrics_per_era(data, tournament, metrics=['logloss'], era_as_str=False) for i, pair in enumerate(pairs): m1i = m1[(m1.name == pair[0]) & (m1.tournament == nx.tournament_str(pair[1]))] m2i = m2[(m2.name == pair[0]) & (m2.tournament == nx.tournament_str(pair[1]))] if (m1i.index != m2i.index).any(): raise IndexError("Can only handle aligned eras") logloss1 = m1i.logloss.mean() logloss2 = m2i.logloss.mean() win1 = (m1i.logloss < m2i.logloss).mean() y1 = df1[pair].y.reshape(-1) y2 = df2[pair].y.reshape(-1) corr = np.corrcoef(y1, y2)[0, 1] maxdiff = np.abs(y1 - y2).max() ystd1 = y1.std() ystd2 = y2.std() m = [logloss1, logloss2, win1, corr, maxdiff, ystd1, ystd2] comp.iloc[i] = m comp = add_split_pairs(comp) return comp