def evaluate(self, X_top, X_bot, *args, **kwargs): X, y = make_classification_dataset(X_top, X_bot) return self.model.evaluate(X, y, *args, **kwargs)
def main(name, gamma, estimation, output_dir, transparent, context, style, palette, width, height, aspect, dpi, extension, seed): num_features = 1 # dimensionality num_train = 1000 # nbr training points in synthetic dataset # x_min, x_max = -6.0, 6.0 x_min, x_max = -5.0, 5.0 num_index_points = 512 # nbr of index points if height is None: height = width / aspect # figsize = size(width, aspect) figsize = (width, height) suffix = f"{width*dpi:.0f}x{height*dpi:.0f}" rc = { "figure.figsize": figsize, "font.serif": ["Times New Roman"], "text.usetex": True, } sns.set(context=context, style=style, palette=palette, font="serif", rc=rc) output_path = Path(output_dir).joinpath(name) output_path.mkdir(parents=True, exist_ok=True) random_state = np.random.RandomState(seed) # /preamble X_grid = np.linspace(x_min, x_max, num_index_points) \ .reshape(-1, num_features) p = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]), components_distribution=tfd.Normal(loc=[2.0, -3.0], scale=[1.0, 0.5])) q = tfd.Normal(loc=0.0, scale=2.0) # p = tfd.Normal(loc=0.0, scale=1.0) # q = tfd.Normal(loc=0.5, scale=1.0) # p = tfd.Normal(loc=1.0, scale=1.0) # q = tfd.Normal(loc=0.0, scale=2.0) r = DensityRatioMarginals(top=p, bot=q) X_p, X_q = r.make_dataset(num_train, rate=gamma, seed=seed) X_train, y_train = make_classification_dataset(X_p, X_q) kde_lesser = sm.nonparametric.KDEUnivariate(X_p) kde_lesser.fit(bw="normal_reference") kde_greater = sm.nonparametric.KDEUnivariate(X_q) kde_greater.fit(bw="normal_reference") fig, (ax1, ax2) = plt.subplots(nrows=2, sharex="col") # fig, ax1 = plt.subplots() l, = ax1.plot(X_grid.squeeze(axis=-1), r.top.prob(X_grid).numpy().squeeze(axis=-1), label=r"$\ell(x)$") g, = ax1.plot(X_grid.squeeze(axis=-1), r.bot.prob(X_grid).numpy().squeeze(axis=-1), label=r"$g(x)$") # ax1.annotate(r"$\mathcal{N}(0, 1)$", # xy=(-4.3, 0.22), # xycoords='data', xytext=(1, 1), # color=l.get_color(), # textcoords='offset points', fontsize="small", # # arrowprops=dict(facecolor='black', shrink=0.05), # # bbox=dict(boxstyle="round", fc="none"), # horizontalalignment='left', verticalalignment='top') # ax1.annotate(r"$\mathcal{N}(0.5, 1)$", # xy=(1.4, 0.35), # xycoords='data', xytext=(1, 1), # color=g.get_color(), # textcoords='offset points', fontsize="small", # # arrowprops=dict(facecolor='black', shrink=0.05), # # bbox=dict(boxstyle="round", fc="none"), # horizontalalignment='left', verticalalignment='top') ax1.set_xlabel(r'$x$') ax1.set_ylabel('density') ax1.legend() # plt.tight_layout() # for ext in extension: # fig.savefig(output_path.joinpath(f"densities_{context}_{suffix}.{ext}"), # dpi=dpi, transparent=transparent) # plt.show() # fig, ax2 = plt.subplots() foo, = ax2.plot(X_grid.squeeze(axis=-1), r.ratio(X_grid).numpy().squeeze(axis=-1), label=r"$r_0(x)$", color="tab:orange") bar, = ax2.plot(X_grid.squeeze(axis=-1), gamma_relative_density_ratio( r.ratio(X_grid), gamma=gamma).numpy().squeeze(axis=-1), label=fr"$r_{{{gamma:.2f}}}(x)$", color="tab:green") # ax2.annotate(r"$\gamma=0$", # xy=(-4.3, r.ratio([-4.3]).numpy().squeeze(axis=-1)), # xycoords='data', xytext=(4, 2), # color=foo.get_color(), # textcoords='offset points', fontsize="small", # # arrowprops=dict(facecolor='black', shrink=0.05), # # bbox=dict(boxstyle="round", fc="none"), # horizontalalignment='left', verticalalignment='top') # ax2.annotate(r"$\gamma=\frac{1}{4}$", # xy=(-4.8, 1.5), # xycoords='data', xytext=(1, 1), # color=bar.get_color(), # textcoords='offset points', fontsize="small", # # arrowprops=dict(facecolor='black', shrink=0.05), # # bbox=dict(boxstyle="round", fc="none"), # horizontalalignment='left', verticalalignment='top') # ax2.set_ylim(-0.1, 5.0) # ax2.plot(X_grid.squeeze(axis=-1), # r.ratio(X_grid).numpy().squeeze(axis=-1), label=r"$\frac{\ell(x)}{g(x)}$", # color="tab:orange") # # ax2.plot(X_grid.squeeze(axis=-1), # # gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) # # .numpy().squeeze(axis=-1), label=fr"$r_{{{gamma:.2f}}}(x)$", # # color="tab:green") ax2.set_xlabel(r'$x$') ax2.set_ylabel('density ratio') ax2.legend() plt.tight_layout() for ext in extension: fig.savefig( output_path.joinpath(f"density_ratios_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() return 0 # Build DataFrame rows = [] # rows.append(dict(x=X_grid.squeeze(axis=-1), # y=r.top.prob(X_grid).numpy().squeeze(axis=-1), # density=r"$\ell(x)$", kind=r"$\textsc{exact}$")) # rows.append(dict(x=X_grid.squeeze(axis=-1), # y=r.bot.prob(X_grid).numpy().squeeze(axis=-1), # density=r"$g(x)$", kind=r"$\textsc{exact}$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=r.top.prob(X_grid).numpy().squeeze(axis=-1), kind=r"$\ell(x)$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=r.bot.prob(X_grid).numpy().squeeze(axis=-1), kind=r"$g(x)$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=r.ratio(X_grid).numpy().squeeze(axis=-1), kind=r"$r_0(x)$")) rows.append(dict(x=X_grid.squeeze(axis=-1), y=gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \ .numpy().squeeze(axis=-1), kind=fr"$r_{{{gamma:.2f}}}(x)$")) if estimation: rows.append( dict(x=X_grid.squeeze(axis=-1), y=kde_lesser.evaluate(X_grid.ravel()), density=r"$\ell(x)$", kind=r"$\textsc{kde}$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=kde_greater.evaluate(X_grid.ravel()), density=r"$g(x)$", kind=r"$\textsc{kde}$")) frames = map(pd.DataFrame, rows) data = pd.concat(frames, axis="index", ignore_index=True, sort=True) fig, ax = plt.subplots() sns.lineplot(x='x', y='y', hue="kind", data=data, ax=ax) # sns.lineplot(x='x', y='y', hue="density", style="kind", data=data, ax=ax) # sns.rugplot(X_p.squeeze(), height=0.02, c='tab:blue', alpha=0.2, ax=ax) # sns.rugplot(X_q.squeeze(), height=0.02, c='tab:orange', alpha=0.2, ax=ax) ax.set_xlabel('$x$') ax.set_ylabel('density') plt.tight_layout() for ext in extension: fig.savefig( output_path.joinpath(f"densities_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() if not estimation: return 0 # clf = SVC(C=100.0, kernel="rbf", probability=True, tol=1e-9).fit(X_train, y_train) r_mlp = MLPDensityRatioEstimator(num_layers=3, num_units=32, activation="elu") r_mlp.compile(optimizer="adam", metrics=["accuracy"]) r_mlp.fit(X_p, X_q, epochs=500, batch_size=64) # Build DataFrame rows = [] # exact rows.append({ 'x': X_grid.squeeze(axis=-1), 'y': r.ratio(X_grid).numpy().squeeze(axis=-1), 'kind': r"$\textsc{exact}$", r'$\gamma$': r"$0$" }) rows.append({'x': X_grid.squeeze(axis=-1), 'y': gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \ .numpy().squeeze(axis=-1), 'kind': r"$\textsc{exact}$", r'$\gamma$': r"$\frac{1}{3}$"}) # cpe rows.append({ 'x': X_grid.squeeze(axis=-1), # 'y': np.exp(- clf.decision_function(X_grid) * clf.probA_ + clf.probB_) * (1 - gamma) / gamma, 'y': r_mlp.ratio(X_grid) * (1 - gamma) / gamma, 'kind': r"$\textsc{cpe}$", r'$\gamma$': r"$0$" }) rows.append({ 'x': X_grid.squeeze(axis=-1), 'y': r_mlp.prob(X_grid) / gamma, # 'y': clf.predict_proba(X_grid).T[1] / gamma, 'kind': r"$\textsc{cpe}$", r'$\gamma$': r"$\frac{1}{3}$" }) # kde rows.append({ 'x': X_grid.squeeze(axis=-1), 'y': kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), 'kind': r"$\textsc{kde}$", r'$\gamma$': r"$0$" }) rows.append({ 'x': X_grid.squeeze(axis=-1), 'y': gamma_relative_density_ratio( kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), gamma), 'kind': r"$\textsc{kde}$", r'$\gamma$': r"$\frac{1}{3}$" }) data = pd.concat(map(pd.DataFrame, rows), axis="index", ignore_index=True, sort=True) fig, ax = plt.subplots() sns.lineplot(x='x', y='y', hue="kind", style=r"$\gamma$", palette="Set1", data=data, ax=ax) ax.set_xlabel(r"$x$") ax.set_ylabel(r"$r_{\gamma}(x)$") # ax.set_ylim(-0.01, 1/gamma+0.1) plt.tight_layout() for ext in extension: fig.savefig(output_path.joinpath(f"ratios_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() return 0
def main(name, gamma, output_dir, transparent, context, style, palette, width, height, aspect, dpi, extension, seed): num_features = 1 # dimensionality num_train = 1000 # nbr training points in synthetic dataset # x_min, x_max = -6.0, 6.0 x_min, x_max = -5.0, 5.0 num_index_points = 512 # nbr of index points if height is None: height = width / aspect # figsize = size(width, aspect) figsize = (width, height) suffix = f"{width*dpi:.0f}x{height*dpi:.0f}" rc = { "figure.figsize": figsize, "font.serif": ["Times New Roman"], "text.usetex": True, } sns.set(context=context, style=style, palette=palette, font="serif", rc=rc) output_path = Path(output_dir).joinpath(name) output_path.mkdir(parents=True, exist_ok=True) random_state = np.random.RandomState(seed) # /preamble X_grid = np.linspace(x_min, x_max, num_index_points) \ .reshape(-1, num_features) p = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]), components_distribution=tfd.Normal(loc=[2.0, -3.0], scale=[1.0, 0.5])) q = tfd.Normal(loc=0.0, scale=2.0) r = DensityRatioMarginals(top=p, bot=q) X_p, X_q = r.make_dataset(num_train, rate=gamma, seed=seed) X_train, y_train = make_classification_dataset(X_p, X_q) kde_lesser = sm.nonparametric.KDEUnivariate(X_p) kde_lesser.fit(bw="normal_reference") kde_greater = sm.nonparametric.KDEUnivariate(X_q) kde_greater.fit(bw="normal_reference") # Build DataFrame rows = [] rows.append( dict(x=X_grid.squeeze(axis=-1), y=r.top.prob(X_grid).numpy().squeeze(axis=-1), density=r"$\ell(x)$", kind=r"$\textsc{exact}$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=r.bot.prob(X_grid).numpy().squeeze(axis=-1), density=r"$g(x)$", kind=r"$\textsc{exact}$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=kde_lesser.evaluate(X_grid.ravel()), density=r"$\ell(x)$", kind=r"$\textsc{kde}$")) rows.append( dict(x=X_grid.squeeze(axis=-1), y=kde_greater.evaluate(X_grid.ravel()), density=r"$g(x)$", kind=r"$\textsc{kde}$")) frames = map(pd.DataFrame, rows) data = pd.concat(frames, axis="index", ignore_index=True, sort=True) fig, ax = plt.subplots() sns.lineplot(x='x', y='y', hue="density", style="kind", data=data, ax=ax) ax.set_prop_cycle(None) ax.set_ylim(-0.025, None) ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max()) sns.rugplot(X_p.squeeze(), height=0.02, alpha=0.2, ax=ax) sns.rugplot(X_q.squeeze(), height=0.02, alpha=0.2, ax=ax) ax.set_xlabel(r'$x$') ax.set_ylabel('density') plt.tight_layout() for ext in extension: fig.savefig( output_path.joinpath(f"densities_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() classifiers = dict(svm=SVC(C=10.0, kernel="rbf", probability=True, tol=1e-9), rf=RandomForestClassifier(n_estimators=16, max_depth=3, random_state=random_state), xgb=xgb.XGBClassifier(n_estimators=16, max_depth=3, use_label_encoder=False, random_state=random_state) # mlp= ) # base_clf = RandomForestClassifier(random_state=random_state) # clf = CalibratedClassifierCV(base_estimator=base_clf, method="isotonic") \ # .fit(X_train, y_train) r_mlp = MLPDensityRatioEstimator(num_layers=3, num_units=32, activation="elu") r_mlp.compile(optimizer="adam", metrics=["accuracy"]) r_mlp.fit(X_p, X_q, epochs=500, batch_size=64) # Build DataFrame # rows = [] # # exact # # rows.append({'x': X_grid.squeeze(axis=-1), # # 'y': r.ratio(X_grid).numpy().squeeze(axis=-1), # # 'kind': r"$\textsc{exact}$", r'$\gamma$': r"$0$"}) # rows.append({'x': X_grid.squeeze(axis=-1), # 'y': gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \ # .numpy().squeeze(axis=-1), # 'kind': r"$\textsc{exact}$", r'$\gamma$': r"$\frac{1}{4}$", "exact": True}) # # kde # # rows.append({'x': X_grid.squeeze(axis=-1), # # 'y': kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), # # 'kind': r"$\textsc{kde}$", r'$\gamma$': r"$0$"}) # rows.append({'x': X_grid.squeeze(axis=-1), # 'y': gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), gamma), # 'kind': r"$\textsc{kde}$", r'$\gamma$': r"$\frac{1}{4}$", "exact": False}) # # cpe # for clf_name, clf in classifiers.items(): # clf = clf.fit(X_train, y_train) # rows.append({'x': X_grid.squeeze(axis=-1), # 'y': clf.predict_proba(X_grid).T[1] / gamma, # 'kind': rf"$\textsc{{cpe}}$ (\textsc{{{clf_name}}})", # r'$\gamma$': r"$\frac{1}{3}$", "exact": False}) # data = pd.concat(map(pd.DataFrame, rows), axis="index", ignore_index=True, # sort=True) fig, ax = plt.subplots() ax.plot(X_grid.squeeze(axis=-1), gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma).numpy().squeeze(axis=-1), label=r"$\textsc{exact}$") ax.plot(X_grid.squeeze(axis=-1), gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), gamma=gamma), alpha=0.8, label=r"$\textsc{kde}$") ax.plot(X_grid.squeeze(axis=-1), r_mlp.prob(X_grid) / gamma, alpha=0.8, label=r"$\textsc{{cpe}}$ (\textsc{mlp})") ax.set_xlabel(r"$x$") ax.set_ylabel(r"$r_{\gamma}(x)$") ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max()) ax.legend() plt.tight_layout() for ext in extension: fig.savefig( output_path.joinpath(f"ratios_mlp_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() for clf_name, clf in classifiers.items(): clf = clf.fit(X_train, y_train) fig, ax = plt.subplots() ax.plot(X_grid.squeeze(axis=-1), gamma_relative_density_ratio( r.ratio(X_grid), gamma=gamma).numpy().squeeze(axis=-1), label=r"$\textsc{exact}$") ax.plot( X_grid.squeeze(axis=-1), gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), gamma=gamma), alpha=0.8, label=r"$\textsc{kde}$") ax.plot(X_grid.squeeze(axis=-1), clf.predict_proba(X_grid).T[1] / gamma, alpha=0.8, label=rf"$\textsc{{cpe}}$ (\textsc{{{clf_name}}})") ax.set_xlabel(r"$x$") ax.set_ylabel(r"$r_{\gamma}(x)$") ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max()) ax.legend() plt.tight_layout() for ext in extension: fig.savefig(output_path.joinpath( f"ratios_{clf_name}_{context}_{suffix}.{ext}"), dpi=dpi, transparent=transparent) plt.show() return 0