def calculate_kld(data_1_fn,data_2_fn,names,n_samples=2000):
    assert isinstance(data_1_fn,str)
    assert isinstance(data_2_fn,str)
    assert isinstance(n_samples,int)

    assert os.path.isfile(data_1_fn)
    assert os.path.isfile(data_1_fn)

    data_1 = PyposmatDataFile()
    data_1.read(filename=data_1_fn)

    data_2 = PyposmatDataFile()
    data_2.read(filename=data_2_fn)

    w1,v1 = linalg.eig(np.cov(data_1.df[names].T))
    w2,v2 = linalg.eig(np.cov(data_2.df[names].T))
  
    cov1_ill_conditioned = any([k < 0 for k in w1.tolist()])
    cov2_ill_conditioned = any([k < 0 for k in w2.tolist()])

    any_ill_conditioned = any([cov1_ill_conditioned,cov2_ill_conditioned])

    if any_ill_conditioned:
        print('using ill-conditioned kde')
        kde_1 = GaussianKde(data_1.df[names].T)
        print(kde_1.n, kde_1.d)
        kde_2 = GaussianKde(data_2.df[names].T)
    else:
        kde_1 = gaussian_kde(data_1.df[names].T)
        kde_2 = gaussian_kde(data_2.df[names].T)
    
    kld = kullbach_lieber_divergence(kde_1,kde_2,n_samples)
    return kld
Beispiel #2
0
def dev__kld_calculation_1d_kde():
    n_samples_normal = 1000
    n_samples_kde = 1000
    rv_norm = norm(0,1)
    X_norm = rv_norm.rvs(size=1000)
    rv_kde_1 = gaussian_kde(X_norm)
    X_kde = rv_kde_1.resample(size=1000)
    rv_kde_2 = gaussian_kde(X_kde)
    kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000)

    print(kld)
Beispiel #3
0
def test__kld_calculation_1d_kde():
    n_samples_normal = 1000
    n_samples_kde = 1000
    rv_norm = norm(0,1)
    X_norm = rv_norm.rvs(size=1000)
    rv_kde_1 = gaussian_kde(X_norm)
    X_kde = rv_kde_1.resample(size=1000)
    rv_kde_2 = gaussian_kde(X_kde)
    kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000)

    assert type(kld)==tuple
    assert kld[0]>0
    assert kld[0]>0
def dev__kld_calculation_1d_kde():
    print(80*'-')
    print('{:^80}'.format('dev__kld_calculation_1d_kde'))
    print(80*'-')

    n_samples_normal = 1000
    n_samples_kde = 1000
    rv_norm = norm(0,1)
    X_norm = rv_norm.rvs(size=1000)
    rv_kde_1 = gaussian_kde(X_norm)
    X_kde = rv_kde_1.resample(size=1000)
    rv_kde_2 = gaussian_kde(X_kde)

    assert isinstance(rv_kde_1,gaussian_kde)
    assert isinstance(rv_kde_1,gaussian_kde)
    kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000)

    print(kld)
    return kld
Beispiel #5
0
    except FileNotFoundError as e:
        print("the number of max iterations is actually {}".format(i - 1))
        n_iterations = i - 1
        break

# comparing kde estimates
print('compare kde estimates')
kld = [-1]
for i in range(1, n_iterations):
    df_0 = data['kde'][i - 1].df
    df_1 = data['kde'][i].df
    df_0_p = df_0[config.parameter_names]
    df_1_p = df_1[config.parameter_names]
    nr0, nc0 = df_0_p.shape
    nr1, nc1 = df_1_p.shape
    #print('nrows:: {}={},{}={}'.format(i,nr0,i+1,nr1))
    #print('ncols:: {}={},{}={}'.format(i,nc0,i+1,nc1))
    X0 = df_0_p.values
    X1 = df_1_p.values
    #print('X0:',X0.shape,type(X0))
    silverman86_h0 = Silverman1986_h(X0.T)
    silverman86_h1 = Silverman1986_h(X1.T)
    chiu99_h0 = Chiu1999_h(X0.T)
    chiu99_h1 = Chiu1999_h(X1.T)
    kde_0 = gaussian_kde(X0.T, chiu99_h0)
    kde_1 = gaussian_kde(X1.T, chiu99_h1)
    kld.append(kullbach_lieber_divergence(kde_0, kde_1, 400))
    print(i, silverman86_h1, chiu99_h1, kld[i])
for i, v in enumerate(kld):
    print(i, v)
Beispiel #6
0
    A = A * n * np.eye(n)

    return A


n_samples_normal = 1000
n_samples_kde = 1000

rv_norm = norm(0, 1)
X_norm = rv_norm.rvs(size=1000)
print('X_norm', X_norm.shape)

rv_kde_1 = gaussian_kde(X_norm)
X_kde = rv_kde_1.resample(size=1000)
print('X_kde', X_kde.shape)

rv_kde_2 = gaussian_kde(X_kde)
kld = kullbach_lieber_divergence(rv_kde_1, rv_kde_2, 1000)

print(kld)
exit()
import matplotlib.pyplot as plt
xmin = min(X_norm.min(), X_kde.min())
xmax = max(X_norm.max(), X_kde.max())
x = np.linspace(xmin, xmax, 1000)
fig, ax = plt.subplots()
ax.plot(x, rv_norm.pdf(x))
ax.plot(x, rv_kde.pdf(x))
plt.show()
Beispiel #7
0
    configuration_fn = test_case['Si__sw']['configuration_fn']
    configuration = PyposmatConfigurationFile()
    configuration.read(filename=configuration_fn)
    free_parameter_names = configuration.free_parameter_names
    print('free_parameter_names:{}'.format(free_parameter_names))
    
    for i in range(n_kde_files):
        if i > 0:
            print(80*'-')
            print('i_iteration:{}'.format(i))

            data_directory = test_case['Si__sw']['data_directory']
            kde_file_fn_1 = os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i))
            kde_file_fn_2 = os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1))

            print('kde_file_fn_1:{}'.format(kde_file_fn_1))
            print('kde_file_fn_2:{}'.format(kde_file_fn_2))
            

            kde_file_1 = PyposmatDataFile()
            kde_file_1.read(filename=kde_file_fn_1)
            kde_rv_1 = gaussian_kde(kde_file_1.df[free_parameter_names].T)

            kde_file_2 = PyposmatDataFile()
            kde_file_2.read(filename=kde_file_fn_2)
            kde_rv_2 = gaussian_kde(kde_file_2.df[free_parameter_names].T)

            kld = kullbach_lieber_divergence(kde_rv_1,kde_rv_2,1000)
            print(kld)
    dev__kld_calculation_1d_kde()