コード例 #1
0
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    r = 2
    s = 2
    K = 50

    time_results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }
    all_results = []

    for rho in rhos:
        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            t_ci, t_nad, t_ml = [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            print(f"Timing samples {sample_size} for r = {rho}")

            for k in range(K):
                xy_sample = dist.sample(sample_size)

                plane = Plane(xy_sample)

                # Adaptive algorithm
                t0_ad = time.time()
                ad = AdaptiveAlgorithm(xy_sample, delta, r, s).run()
                t_ci.append(time.time() - t0_ad)

                t0_nad = time.time()
                nad = NonAdaptivePartition(xy_sample, bins=[50, 50]).run()
                t_nad.append(time.time() - t0_nad)

                t0_ml = time.time()
                ml = -np.log(
                    1 - pearsonr(xy_sample[:, 0], xy_sample[:, 1])[0]**2) / 2
                t_ml.append(time.time() - t0_ml)

                all_results.append((ad, nad, ml))

            time_results[rho][0][sample_size] = [
                np.mean(t_ml), np.mean(t_ci),
                np.mean(t_nad)
            ]

            print(
                f"Times: ML: {np.mean(t_ml)}, CI: {np.mean(t_ci)}, NAD: {np.mean(t_nad)}"
            )

    generate_timing_table(time_results)

    print(len(all_results))
コード例 #2
0
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    K = 20

    rs_2 = []
    rs_4 = []
    rs_5 = []
    rs_10 = []

    results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }

    for rho in rhos:

        rs_2_std = []
        rs_4_std = []
        rs_5_std = []
        rs_10_std = []

        real_mi = -np.log(1 - rho**2) / 2

        results[rho][1] = real_mi

        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            rs_2_l, rs_4_l, rs_5_l, rs_10_l = [], [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            for k in range(K):

                xy_sample = dist.sample(sample_size)

                # Adaptive algorithm
                plane = Plane(xy_sample)
                rs_2_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 2, 2).run()))
                plane = Plane(xy_sample)
                rs_4_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 4, 4).run()))
                plane = Plane(xy_sample)
                rs_5_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 5, 5).run()))
                plane = Plane(xy_sample)
                rs_10_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 10, 10).run()))

            results[rho][0][sample_size] = [
                np.mean(rs_2_l),
                np.mean(rs_4_l),
                np.mean(rs_5_l),
                np.mean(rs_10_l)
            ]

            print(
                "---------------------------------------------------------------------------------------------"
            )
            print("rho: %.2f, Sample Size: %d, Real MI: %.4f" %
                  (rho, sample_size, real_mi))
            print("r=s=2: %.4f, r=s=4: %.4f, r=s=5: %.4f, r=s=10: %.4f" %
                  (np.mean(rs_2_l), np.mean(rs_4_l), np.mean(rs_5_l),
                   np.mean(rs_5_l)))

            rs_2_std.append(np.std(rs_2_l))
            rs_4_std.append(np.std(rs_4_l))
            rs_5_std.append(np.std(rs_5_l))
            rs_10_std.append(np.std(rs_5_l))

        rs_2.append(rs_2_std)
        rs_4.append(rs_4_std)
        rs_5.append(rs_5_std)
        rs_10.append(rs_10_std)

    generate_rs_table(results)

    all_std = [rs_2, rs_4, rs_5, rs_10]
    for i, _ in enumerate(["r=s=2", "r=s=4", "r=s=5", "r=s=10"]):

        plt.figure()
        plt.semilogx(sample_sizes,
                     all_std[i][0],
                     '-o',
                     label=r'$\rho$ =' + f'{0.0}')
        plt.semilogx(sample_sizes,
                     all_std[i][1],
                     '-o',
                     label=r'$\rho$ =' + f'{0.3}')
        plt.semilogx(sample_sizes,
                     all_std[i][2],
                     '-o',
                     label=r'$\rho$ =' + f'{0.6}')
        plt.semilogx(sample_sizes,
                     all_std[i][3],
                     '-o',
                     label=r'$\rho$ =' + f'{0.9}')
        plt.xlabel('$\log_{10}$ of sample size')

        if i == 0:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=2}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=2$")
        elif i == 1:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=4}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=4$")
        elif i == 2:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=5}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=5$")
        else:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=10}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=10$")

        plt.legend()
        plt.show()
コード例 #3
0
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    r = 2
    s = 2
    K = 50

    ci_mi_all_std = []
    na_mi_all_std = []
    ml_mi_all_std = []

    results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }

    for rho in rhos:

        ci_mi_std = []
        na_mi_std = []
        ml_mi_std = []

        real_mi = -np.log(1 - rho**2) / 2

        results[rho][1] = real_mi

        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            ci_mi_l, ml_mi_l, na_mi_l = [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            for k in range(K):

                xy_sample = dist.sample(sample_size)

                plane = Plane(xy_sample)

                # Adaptive algorithm
                ci_mi_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, r, s).run()))

                na_mi_l.append(
                    kl_estimate(
                        plane,
                        NonAdaptivePartition(xy_sample, bins=[50, 50]).run()))

                ml_mi_l.append(
                    -np.log(1 -
                            pearsonr(xy_sample[:, 0], xy_sample[:, 1])[0]**2) /
                    2)

            results[rho][0][sample_size] = [
                np.mean(ml_mi_l),
                np.mean(ci_mi_l),
                np.mean(na_mi_l)
            ]

            print(
                "---------------------------------------------------------------------------------------------"
            )
            print("rho: %.2f, Sample Size: %d, Real MI: %.4f" %
                  (rho, sample_size, real_mi))
            print(
                "Adaptive Partition MI: %.4f, NA Partition MI: %.4f, ML MI: %.4f"
                % (np.mean(ci_mi_l), np.mean(na_mi_l), np.mean(ml_mi_l)))

            ci_mi_std.append(np.std(ci_mi_l))
            na_mi_std.append(np.std(na_mi_l))
            ml_mi_std.append(np.std(ml_mi_l))

        ci_mi_all_std.append(ci_mi_std)
        na_mi_all_std.append(na_mi_std)
        ml_mi_all_std.append(ml_mi_std)

    generate_table(results)

    all_std = [ci_mi_all_std, na_mi_all_std, ml_mi_all_std]
    for i, _ in enumerate(["CI", "NA", "ML"]):

        plt.figure()
        plt.semilogx(sample_sizes,
                     all_std[i][0],
                     '-o',
                     label=r'$\rho$ =' + f'{0.0}')
        plt.semilogx(sample_sizes,
                     all_std[i][1],
                     '-o',
                     label=r'$\rho$ =' + f'{0.3}')
        plt.semilogx(sample_sizes,
                     all_std[i][2],
                     '-o',
                     label=r'$\rho$ =' + f'{0.6}')
        plt.semilogx(sample_sizes,
                     all_std[i][3],
                     '-o',
                     label=r'$\rho$ =' + f'{0.9}')
        plt.xlabel('$\log_{10}$ of sample size')

        if i == 0:
            plt.ylabel("std($\hat{I}_{CI}$)")
            plt.title("Standard deviation of MI estimator $I_{CI}$")
        elif i == 1:
            plt.ylabel("std($\hat{I}_{NA}$)")
            plt.title("Standard deviation of MI estimator $I_{NA}$")
        else:
            plt.ylabel("std($\hat{I}_{ML}$)")
            plt.title("Standard deviation of MI estimator $I_{ML}$")

        plt.legend()
        plt.show()