Python make_classification_dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: bore_experiments.datasets

메소드/함수: make_classification_dataset

hotexamples.com에서의 예제들: 3

Python make_classification_dataset - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 bore_experiments.datasets.make_classification_dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def evaluate(self, X_top, X_bot, *args, **kwargs):

        X, y = make_classification_dataset(X_top, X_bot)
        return self.model.evaluate(X, y, *args, **kwargs)

예제 #2

파일 보기

def main(name, gamma, estimation, output_dir, transparent, context, style,
         palette, width, height, aspect, dpi, extension, seed):

    num_features = 1  # dimensionality
    num_train = 1000  # nbr training points in synthetic dataset
    # x_min, x_max = -6.0, 6.0
    x_min, x_max = -5.0, 5.0
    num_index_points = 512  # nbr of index points

    if height is None:
        height = width / aspect
    # figsize = size(width, aspect)
    figsize = (width, height)
    suffix = f"{width*dpi:.0f}x{height*dpi:.0f}"

    rc = {
        "figure.figsize": figsize,
        "font.serif": ["Times New Roman"],
        "text.usetex": True,
    }
    sns.set(context=context, style=style, palette=palette, font="serif", rc=rc)

    output_path = Path(output_dir).joinpath(name)
    output_path.mkdir(parents=True, exist_ok=True)

    random_state = np.random.RandomState(seed)
    # /preamble

    X_grid = np.linspace(x_min, x_max, num_index_points) \
               .reshape(-1, num_features)

    p = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]),
        components_distribution=tfd.Normal(loc=[2.0, -3.0], scale=[1.0, 0.5]))
    q = tfd.Normal(loc=0.0, scale=2.0)

    # p = tfd.Normal(loc=0.0, scale=1.0)
    # q = tfd.Normal(loc=0.5, scale=1.0)

    # p = tfd.Normal(loc=1.0, scale=1.0)
    # q = tfd.Normal(loc=0.0, scale=2.0)

    r = DensityRatioMarginals(top=p, bot=q)

    X_p, X_q = r.make_dataset(num_train, rate=gamma, seed=seed)
    X_train, y_train = make_classification_dataset(X_p, X_q)

    kde_lesser = sm.nonparametric.KDEUnivariate(X_p)
    kde_lesser.fit(bw="normal_reference")

    kde_greater = sm.nonparametric.KDEUnivariate(X_q)
    kde_greater.fit(bw="normal_reference")

    fig, (ax1, ax2) = plt.subplots(nrows=2, sharex="col")

    # fig, ax1 = plt.subplots()

    l, = ax1.plot(X_grid.squeeze(axis=-1),
                  r.top.prob(X_grid).numpy().squeeze(axis=-1),
                  label=r"$\ell(x)$")
    g, = ax1.plot(X_grid.squeeze(axis=-1),
                  r.bot.prob(X_grid).numpy().squeeze(axis=-1),
                  label=r"$g(x)$")

    # ax1.annotate(r"$\mathcal{N}(0, 1)$",
    #              xy=(-4.3, 0.22),
    #              xycoords='data', xytext=(1, 1),
    #              color=l.get_color(),
    #              textcoords='offset points', fontsize="small",
    #              # arrowprops=dict(facecolor='black', shrink=0.05),
    #              # bbox=dict(boxstyle="round", fc="none"),
    #              horizontalalignment='left', verticalalignment='top')

    # ax1.annotate(r"$\mathcal{N}(0.5, 1)$",
    #              xy=(1.4, 0.35),
    #              xycoords='data', xytext=(1, 1),
    #              color=g.get_color(),
    #              textcoords='offset points', fontsize="small",
    #              # arrowprops=dict(facecolor='black', shrink=0.05),
    #              # bbox=dict(boxstyle="round", fc="none"),
    #              horizontalalignment='left', verticalalignment='top')

    ax1.set_xlabel(r'$x$')
    ax1.set_ylabel('density')

    ax1.legend()

    # plt.tight_layout()

    # for ext in extension:
    #     fig.savefig(output_path.joinpath(f"densities_{context}_{suffix}.{ext}"),
    #                 dpi=dpi, transparent=transparent)

    # plt.show()

    # fig, ax2 = plt.subplots()

    foo, = ax2.plot(X_grid.squeeze(axis=-1),
                    r.ratio(X_grid).numpy().squeeze(axis=-1),
                    label=r"$r_0(x)$",
                    color="tab:orange")
    bar, = ax2.plot(X_grid.squeeze(axis=-1),
                    gamma_relative_density_ratio(
                        r.ratio(X_grid), gamma=gamma).numpy().squeeze(axis=-1),
                    label=fr"$r_{{{gamma:.2f}}}(x)$",
                    color="tab:green")

    # ax2.annotate(r"$\gamma=0$",
    #              xy=(-4.3, r.ratio([-4.3]).numpy().squeeze(axis=-1)),
    #              xycoords='data', xytext=(4, 2),
    #              color=foo.get_color(),
    #              textcoords='offset points', fontsize="small",
    #              # arrowprops=dict(facecolor='black', shrink=0.05),
    #              # bbox=dict(boxstyle="round", fc="none"),
    #              horizontalalignment='left', verticalalignment='top')

    # ax2.annotate(r"$\gamma=\frac{1}{4}$",
    #              xy=(-4.8, 1.5),
    #              xycoords='data', xytext=(1, 1),
    #              color=bar.get_color(),
    #              textcoords='offset points', fontsize="small",
    #              # arrowprops=dict(facecolor='black', shrink=0.05),
    #              # bbox=dict(boxstyle="round", fc="none"),
    #              horizontalalignment='left', verticalalignment='top')

    # ax2.set_ylim(-0.1, 5.0)

    # ax2.plot(X_grid.squeeze(axis=-1),
    #          r.ratio(X_grid).numpy().squeeze(axis=-1), label=r"$\frac{\ell(x)}{g(x)}$",
    #          color="tab:orange")
    # # ax2.plot(X_grid.squeeze(axis=-1),
    # #          gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma)
    # #                .numpy().squeeze(axis=-1), label=fr"$r_{{{gamma:.2f}}}(x)$",
    # #          color="tab:green")

    ax2.set_xlabel(r'$x$')
    ax2.set_ylabel('density ratio')

    ax2.legend()

    plt.tight_layout()

    for ext in extension:
        fig.savefig(
            output_path.joinpath(f"density_ratios_{context}_{suffix}.{ext}"),
            dpi=dpi,
            transparent=transparent)

    plt.show()

    return 0

    # Build DataFrame
    rows = []
    # rows.append(dict(x=X_grid.squeeze(axis=-1),
    #                  y=r.top.prob(X_grid).numpy().squeeze(axis=-1),
    #                  density=r"$\ell(x)$", kind=r"$\textsc{exact}$"))
    # rows.append(dict(x=X_grid.squeeze(axis=-1),
    #                  y=r.bot.prob(X_grid).numpy().squeeze(axis=-1),
    #                  density=r"$g(x)$", kind=r"$\textsc{exact}$"))

    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=r.top.prob(X_grid).numpy().squeeze(axis=-1),
             kind=r"$\ell(x)$"))
    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=r.bot.prob(X_grid).numpy().squeeze(axis=-1),
             kind=r"$g(x)$"))

    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=r.ratio(X_grid).numpy().squeeze(axis=-1),
             kind=r"$r_0(x)$"))
    rows.append(dict(x=X_grid.squeeze(axis=-1),
                     y=gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \
                             .numpy().squeeze(axis=-1),
                     kind=fr"$r_{{{gamma:.2f}}}(x)$"))

    if estimation:
        rows.append(
            dict(x=X_grid.squeeze(axis=-1),
                 y=kde_lesser.evaluate(X_grid.ravel()),
                 density=r"$\ell(x)$",
                 kind=r"$\textsc{kde}$"))
        rows.append(
            dict(x=X_grid.squeeze(axis=-1),
                 y=kde_greater.evaluate(X_grid.ravel()),
                 density=r"$g(x)$",
                 kind=r"$\textsc{kde}$"))

    frames = map(pd.DataFrame, rows)
    data = pd.concat(frames, axis="index", ignore_index=True, sort=True)

    fig, ax = plt.subplots()

    sns.lineplot(x='x', y='y', hue="kind", data=data, ax=ax)

    # sns.lineplot(x='x', y='y', hue="density", style="kind", data=data, ax=ax)

    # sns.rugplot(X_p.squeeze(), height=0.02, c='tab:blue', alpha=0.2, ax=ax)
    # sns.rugplot(X_q.squeeze(), height=0.02, c='tab:orange', alpha=0.2, ax=ax)

    ax.set_xlabel('$x$')
    ax.set_ylabel('density')

    plt.tight_layout()

    for ext in extension:
        fig.savefig(
            output_path.joinpath(f"densities_{context}_{suffix}.{ext}"),
            dpi=dpi,
            transparent=transparent)

    plt.show()

    if not estimation:
        return 0

    # clf = SVC(C=100.0, kernel="rbf", probability=True, tol=1e-9).fit(X_train, y_train)

    r_mlp = MLPDensityRatioEstimator(num_layers=3,
                                     num_units=32,
                                     activation="elu")
    r_mlp.compile(optimizer="adam", metrics=["accuracy"])
    r_mlp.fit(X_p, X_q, epochs=500, batch_size=64)

    # Build DataFrame
    rows = []
    # exact
    rows.append({
        'x': X_grid.squeeze(axis=-1),
        'y': r.ratio(X_grid).numpy().squeeze(axis=-1),
        'kind': r"$\textsc{exact}$",
        r'$\gamma$': r"$0$"
    })
    rows.append({'x': X_grid.squeeze(axis=-1),
                 'y': gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \
                            .numpy().squeeze(axis=-1),
                 'kind': r"$\textsc{exact}$", r'$\gamma$': r"$\frac{1}{3}$"})
    # cpe
    rows.append({
        'x': X_grid.squeeze(axis=-1),
        # 'y': np.exp(- clf.decision_function(X_grid) * clf.probA_ + clf.probB_) * (1 - gamma) / gamma,
        'y': r_mlp.ratio(X_grid) * (1 - gamma) / gamma,
        'kind': r"$\textsc{cpe}$",
        r'$\gamma$': r"$0$"
    })
    rows.append({
        'x': X_grid.squeeze(axis=-1),
        'y': r_mlp.prob(X_grid) / gamma,
        # 'y': clf.predict_proba(X_grid).T[1] / gamma,
        'kind': r"$\textsc{cpe}$",
        r'$\gamma$': r"$\frac{1}{3}$"
    })
    # kde
    rows.append({
        'x':
        X_grid.squeeze(axis=-1),
        'y':
        kde_lesser.evaluate(X_grid.ravel()) /
        kde_greater.evaluate(X_grid.ravel()),
        'kind':
        r"$\textsc{kde}$",
        r'$\gamma$':
        r"$0$"
    })
    rows.append({
        'x':
        X_grid.squeeze(axis=-1),
        'y':
        gamma_relative_density_ratio(
            kde_lesser.evaluate(X_grid.ravel()) /
            kde_greater.evaluate(X_grid.ravel()), gamma),
        'kind':
        r"$\textsc{kde}$",
        r'$\gamma$':
        r"$\frac{1}{3}$"
    })
    data = pd.concat(map(pd.DataFrame, rows),
                     axis="index",
                     ignore_index=True,
                     sort=True)

    fig, ax = plt.subplots()

    sns.lineplot(x='x',
                 y='y',
                 hue="kind",
                 style=r"$\gamma$",
                 palette="Set1",
                 data=data,
                 ax=ax)

    ax.set_xlabel(r"$x$")
    ax.set_ylabel(r"$r_{\gamma}(x)$")

    # ax.set_ylim(-0.01, 1/gamma+0.1)

    plt.tight_layout()

    for ext in extension:
        fig.savefig(output_path.joinpath(f"ratios_{context}_{suffix}.{ext}"),
                    dpi=dpi,
                    transparent=transparent)

    plt.show()

    return 0

예제 #3

파일 보기

def main(name, gamma, output_dir, transparent, context, style, palette, width,
         height, aspect, dpi, extension, seed):

    num_features = 1  # dimensionality
    num_train = 1000  # nbr training points in synthetic dataset
    # x_min, x_max = -6.0, 6.0
    x_min, x_max = -5.0, 5.0
    num_index_points = 512  # nbr of index points

    if height is None:
        height = width / aspect
    # figsize = size(width, aspect)
    figsize = (width, height)
    suffix = f"{width*dpi:.0f}x{height*dpi:.0f}"

    rc = {
        "figure.figsize": figsize,
        "font.serif": ["Times New Roman"],
        "text.usetex": True,
    }
    sns.set(context=context, style=style, palette=palette, font="serif", rc=rc)

    output_path = Path(output_dir).joinpath(name)
    output_path.mkdir(parents=True, exist_ok=True)

    random_state = np.random.RandomState(seed)
    # /preamble

    X_grid = np.linspace(x_min, x_max, num_index_points) \
               .reshape(-1, num_features)

    p = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]),
        components_distribution=tfd.Normal(loc=[2.0, -3.0], scale=[1.0, 0.5]))
    q = tfd.Normal(loc=0.0, scale=2.0)

    r = DensityRatioMarginals(top=p, bot=q)

    X_p, X_q = r.make_dataset(num_train, rate=gamma, seed=seed)
    X_train, y_train = make_classification_dataset(X_p, X_q)

    kde_lesser = sm.nonparametric.KDEUnivariate(X_p)
    kde_lesser.fit(bw="normal_reference")

    kde_greater = sm.nonparametric.KDEUnivariate(X_q)
    kde_greater.fit(bw="normal_reference")

    # Build DataFrame
    rows = []
    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=r.top.prob(X_grid).numpy().squeeze(axis=-1),
             density=r"$\ell(x)$",
             kind=r"$\textsc{exact}$"))
    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=r.bot.prob(X_grid).numpy().squeeze(axis=-1),
             density=r"$g(x)$",
             kind=r"$\textsc{exact}$"))
    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=kde_lesser.evaluate(X_grid.ravel()),
             density=r"$\ell(x)$",
             kind=r"$\textsc{kde}$"))
    rows.append(
        dict(x=X_grid.squeeze(axis=-1),
             y=kde_greater.evaluate(X_grid.ravel()),
             density=r"$g(x)$",
             kind=r"$\textsc{kde}$"))

    frames = map(pd.DataFrame, rows)
    data = pd.concat(frames, axis="index", ignore_index=True, sort=True)

    fig, ax = plt.subplots()

    sns.lineplot(x='x', y='y', hue="density", style="kind", data=data, ax=ax)

    ax.set_prop_cycle(None)
    ax.set_ylim(-0.025, None)
    ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max())

    sns.rugplot(X_p.squeeze(), height=0.02, alpha=0.2, ax=ax)
    sns.rugplot(X_q.squeeze(), height=0.02, alpha=0.2, ax=ax)

    ax.set_xlabel(r'$x$')
    ax.set_ylabel('density')

    plt.tight_layout()

    for ext in extension:
        fig.savefig(
            output_path.joinpath(f"densities_{context}_{suffix}.{ext}"),
            dpi=dpi,
            transparent=transparent)

    plt.show()

    classifiers = dict(svm=SVC(C=10.0,
                               kernel="rbf",
                               probability=True,
                               tol=1e-9),
                       rf=RandomForestClassifier(n_estimators=16,
                                                 max_depth=3,
                                                 random_state=random_state),
                       xgb=xgb.XGBClassifier(n_estimators=16,
                                             max_depth=3,
                                             use_label_encoder=False,
                                             random_state=random_state)
                       # mlp=
                       )

    # base_clf = RandomForestClassifier(random_state=random_state)
    # clf = CalibratedClassifierCV(base_estimator=base_clf, method="isotonic") \
    #     .fit(X_train, y_train)

    r_mlp = MLPDensityRatioEstimator(num_layers=3,
                                     num_units=32,
                                     activation="elu")
    r_mlp.compile(optimizer="adam", metrics=["accuracy"])
    r_mlp.fit(X_p, X_q, epochs=500, batch_size=64)

    # Build DataFrame
    # rows = []

    # # exact
    # # rows.append({'x': X_grid.squeeze(axis=-1),
    # #              'y': r.ratio(X_grid).numpy().squeeze(axis=-1),
    # #              'kind': r"$\textsc{exact}$", r'$\gamma$': r"$0$"})
    # rows.append({'x': X_grid.squeeze(axis=-1),
    #              'y': gamma_relative_density_ratio(r.ratio(X_grid), gamma=gamma) \
    #                         .numpy().squeeze(axis=-1),
    #              'kind': r"$\textsc{exact}$", r'$\gamma$': r"$\frac{1}{4}$", "exact": True})

    # # kde
    # # rows.append({'x': X_grid.squeeze(axis=-1),
    # #              'y': kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()),
    # #              'kind': r"$\textsc{kde}$", r'$\gamma$': r"$0$"})
    # rows.append({'x': X_grid.squeeze(axis=-1),
    #              'y': gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) / kde_greater.evaluate(X_grid.ravel()), gamma),
    #              'kind': r"$\textsc{kde}$", r'$\gamma$': r"$\frac{1}{4}$", "exact": False})

    # # cpe
    # for clf_name, clf in classifiers.items():

    #     clf = clf.fit(X_train, y_train)
    #     rows.append({'x': X_grid.squeeze(axis=-1),
    #                  'y': clf.predict_proba(X_grid).T[1] / gamma,
    #                  'kind': rf"$\textsc{{cpe}}$ (\textsc{{{clf_name}}})",
    #                  r'$\gamma$': r"$\frac{1}{3}$", "exact": False})

    # data = pd.concat(map(pd.DataFrame, rows), axis="index", ignore_index=True,
    #                  sort=True)

    fig, ax = plt.subplots()

    ax.plot(X_grid.squeeze(axis=-1),
            gamma_relative_density_ratio(r.ratio(X_grid),
                                         gamma=gamma).numpy().squeeze(axis=-1),
            label=r"$\textsc{exact}$")

    ax.plot(X_grid.squeeze(axis=-1),
            gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) /
                                         kde_greater.evaluate(X_grid.ravel()),
                                         gamma=gamma),
            alpha=0.8,
            label=r"$\textsc{kde}$")

    ax.plot(X_grid.squeeze(axis=-1),
            r_mlp.prob(X_grid) / gamma,
            alpha=0.8,
            label=r"$\textsc{{cpe}}$ (\textsc{mlp})")

    ax.set_xlabel(r"$x$")
    ax.set_ylabel(r"$r_{\gamma}(x)$")

    ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max())

    ax.legend()

    plt.tight_layout()

    for ext in extension:
        fig.savefig(
            output_path.joinpath(f"ratios_mlp_{context}_{suffix}.{ext}"),
            dpi=dpi,
            transparent=transparent)

    plt.show()

    for clf_name, clf in classifiers.items():

        clf = clf.fit(X_train, y_train)

        fig, ax = plt.subplots()

        ax.plot(X_grid.squeeze(axis=-1),
                gamma_relative_density_ratio(
                    r.ratio(X_grid), gamma=gamma).numpy().squeeze(axis=-1),
                label=r"$\textsc{exact}$")

        ax.plot(
            X_grid.squeeze(axis=-1),
            gamma_relative_density_ratio(kde_lesser.evaluate(X_grid.ravel()) /
                                         kde_greater.evaluate(X_grid.ravel()),
                                         gamma=gamma),
            alpha=0.8,
            label=r"$\textsc{kde}$")

        ax.plot(X_grid.squeeze(axis=-1),
                clf.predict_proba(X_grid).T[1] / gamma,
                alpha=0.8,
                label=rf"$\textsc{{cpe}}$ (\textsc{{{clf_name}}})")

        ax.set_xlabel(r"$x$")
        ax.set_ylabel(r"$r_{\gamma}(x)$")

        ax.set_xlim(1.1 * X_grid.min(), 1.1 * X_grid.max())

        ax.legend()

        plt.tight_layout()

        for ext in extension:
            fig.savefig(output_path.joinpath(
                f"ratios_{clf_name}_{context}_{suffix}.{ext}"),
                        dpi=dpi,
                        transparent=transparent)

        plt.show()

    return 0