예제 #1
0
 def __init__(self, number_of_gaussian):
     self.ng = number_of_gaussian
     self.gmm = GMM(number_of_gaussian)
     self.comparator_with_impostor = ComparatorWithImpostor(
         number_of_gaussian)
     self.comparator = Comparator(number_of_gaussian)
     self.cross_validator = CrossValidation(0.3)
예제 #2
0
class trainingClass:
        
    def __init__(self, size, output_wavefile):
        self.Training_info = GMM(size, output_wavefile);
    
    def Training_feature_Mean(self):
    
        Mean_training = self.Training_info.GMM_Model_Mean()
    
        return Mean_training
    
    
    def Training_feature_Weight(self):
    
        Weight_training = self.Training_info.GMM_Model_Weight()
    
        return Weight_training
    
    
    def Training_feature_Covar(self):
    
        Covar_training = self.Training_info.GMM_Model_Covar()
    
        return Covar_training
    
    def adjustFeatures(self,name,mainF):
    
        self.Training_info.adjustFeatures(name,mainF);
예제 #3
0
def cute_test(length):
    p_array = []
    counter = 0
    for i in range(0, 1000):
        #cause, effect = generate_continue_data(200, random.randint(1,3))  # random.randint(1,5)
        cause = GMM(5, 200)
        effect = GMM(8, 200)
        #effect, test2 = generate_continue_data(200, 3)  # random.randint(1,5)
        cause = change_to_zero_one(cause)
        effect = change_to_zero_one(effect)

        cause2effect = bernoulli2(effect, length) - cbernoulli2(
            effect, cause, length)
        effect2cause = bernoulli2(cause, length) - cbernoulli2(
            cause, effect, length)
        #print 'cause' + ' -> ' + 'effect' + ':' + str(cause2effect)
        #print 'effect' + ' -> ' + 'cause' + ':' + str(effect2cause)
        p = math.pow(2, -(cause2effect - effect2cause))
        p_array.append(p)
        if cause2effect > effect2cause:
            counter += 1
    print
    print counter
    print bh_procedure(p_array, 0.05)
    return counter / 100.0
예제 #4
0
def main():
    pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian"

    train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ")
    test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ")

    Xtrain = train_data[:, :2]
    Xtest = test_data[:, :2]

    models = {"GMM": GMM(isotropic=False), "HMM": HMM()}
    K = 4  #number of clusters

    for name in ["GMM", "HMM"]:

        print(name)
        model = models[name]
        model.fit(Xtrain, K, eps=pow(10, -2))

        # visualize clusters and frontiers
        model.plot_clusters(Xtrain, "figs/" + name + " on train", save=True)
        model.plot_clusters(Xtest, "figs/" + name + " on test", save=True)

        print("")

        lik = model.compute_log_likelihood(Xtrain)
        print("mean log-likelihood on training set : ", lik / Xtrain.shape[0])

        lik = model.compute_log_likelihood(Xtest)
        print("mean log-likelihood on test set : ", lik / Xtest.shape[0])

        print("\n------------------------\n")
예제 #5
0
def main():
    trainD, devD, testD = init()
    allD = Data(trainD, devD)
    if sys.argv[1] == "display":
        display(allD)
        exit(0)
    if sys.argv[1] == "train":
        # local settings
        x = trainD.nx()
        y = trainD.ny()
        xx = devD.nx()
    else:
        # submit settings
        x = allD.nx()
        y = allD.ny()
        xx = testD.nx()
    gmm1 = GMM(x[y == 1], round=500, K=4)
    gmm2 = GMM(x[y == 2], round=500, K=4)
    print("GMM1.dist: ", gmm1.pi)
    print("GMM2.dist: ", gmm2.pi)
    r1 = gmm1.predict(xx) * np.sum(y == 2)
    r2 = gmm2.predict(xx) * np.sum(y == 1)
    result = 1 + (r1 < r2) * 1
    if sys.argv[1] == "train":
        # local settings
        print("accuracy: ", sum(result == devD.ny()) / devD.ny().shape[0])
    else:
        # submit settings
        testD.y = list(result)
        testD.output()
def test_cluster_num():
    for i in [2,3,4,5,6]:
        Data = np.load("./Data/cluster_"+str(i)+".npy")
        gmm = GMM(data = Data)
        gmm.aic()
        gmm.bic()
        vbem = VBEM(data = Data)
        vbem.train()
        vbem.show()
def test_sample_sizes():
    for i in [10,20,50,100,200,300]:
        Data = np.load("./Data/data_"+str(i)+".npy")
        gmm = GMM(data = Data)
        gmm.aic()
        gmm.bic()
        vbem = VBEM(data = Data)
        vbem.train()
        vbem.show()
def test_dimension():
    for i in [3,4,5,6,7]:
        Data = np.load("./Data/dimension_"+str(i)+".npy")
        gmm = GMM(data = Data)
        gmm.aic()
        gmm.bic()
        vbem = VBEM(data = Data)
        vbem.train()
        vbem.show()
예제 #9
0
def main():
    pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian"

    train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ")
    test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ")

    Xtrain = train_data[:, :2]
    Xtest = test_data[:, :2]

    models = {
        "Kmeans": Kmeans(),
        "GMM_general": GMM(isotropic=False),
        "GMM_isotropic": GMM(isotropic=True)
    }
    K = 4  #number of clusters

    for name in ["Kmeans", "GMM_isotropic", "GMM_general"]:

        print(name)
        model = models[name]
        model.fit(Xtrain, 4)

        # visualize clusters and frontiers
        model.plot_clusters(Xtrain, name + " on train", save=False)
        model.plot_clusters(Xtest, name + " on test", save=False)

        if name in ["GMM_general", "GMM_isotropic"]:

            lik = model.compute_log_likelihood(Xtrain)
            print("mean log-likelihood on training set : ",
                  lik / Xtrain.shape[0])

            lik = model.compute_log_likelihood(Xtest)
            print("mean log-likelihood on test set : ", lik / Xtest.shape[0])

        print("")
예제 #10
0
def main():
    # declare variables
    K_value = 3
    max_epoch = 1000
    repeat_num = 8
    repeat_num_gmm = 1
    name = 'GMM_dataset.txt'

    # get the training data
    training_data = data_loading(name)

    if (sys.argv[1] == 'kmean'):
        # run the K-mean program
        program_name = './a.out'
        parameter_line = ' ' + 'training_kmeans ' + str(K_value) + ' ' + str(
            repeat_num) + ' 0'
        print('Running K-mean')
        os.system(program_name + parameter_line)
        print('The Program is done.')

        # read minimum SSE Position
        min_sse_pos = np.loadtxt('./min_sse_pos.csv',
                                 delimiter=',',
                                 skiprows=0)

        # read clusters from K-mean algorithm
        kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv'
        kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0)

        for draw_ind in range(int(kmean_clusters.shape[0] / K_value)):
            # the last index: (int(kmean_clusters.shape[0] / K_value) - 1)
            start_index = draw_ind * K_value
            iter_kmean_clusters = kmean_clusters[start_index:start_index +
                                                 K_value, :]

            # label assignment
            out_label, cov_list = label_assignment(iter_kmean_clusters,
                                                   training_data)

            # save the figures
            plt.rcParams.update({'figure.max_open_warning': 0})
            fig, ax = plt.subplots()
            ax.scatter(training_data[:, 0],
                       training_data[:, 1],
                       c=out_label,
                       alpha=0.5)
            ax.scatter(iter_kmean_clusters[:, 0],
                       iter_kmean_clusters[:, 1],
                       c='b',
                       s=100,
                       alpha=0.5)
            plt.xlabel('Feature: x1')
            plt.ylabel('Feature: x2')
            plt.title('K-mean Clustering')
            fig.savefig('./kmean_result/iter' + str(draw_ind) + '.png')
            #fig.clf()
    elif (sys.argv[1] == 'gmm'):
        # read minimum SSE Position
        min_sse_pos = np.loadtxt('./min_sse_pos.csv',
                                 delimiter=',',
                                 skiprows=0)

        # read clusters from K-mean algorithm
        kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv'
        kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0)
        start_index = (int(kmean_clusters.shape[0] / K_value) - 1) * K_value
        iter_kmean_clusters = kmean_clusters[start_index:start_index +
                                             K_value, :]
        out_label, cov_list = label_assignment(iter_kmean_clusters,
                                               training_data)

        # call GMM class
        gmm = GMM(K_value, repeat_num_gmm, max_epoch, training_data)
        all_likelihood, parameters = gmm.model_training(
            iter_kmean_clusters, out_label, cov_list)
        true_mu, true_covariance = gmm.fit_true_model(training_data)
        # find the mu, covariance, and prior
        best_likelihood, all_mu, all_cov, all_prior = find_the_best(
            all_likelihood, parameters)

        # prediction phase
        prediction = gmm.model_predict(all_mu[-1], all_cov[-1], all_prior[-1],
                                       training_data)
        labels = label_GMM(prediction)

        # drawing gaussian functions
        for ind in range(len(all_mu)):
            out_para = drawing_Gaussian(all_mu[ind], all_cov[ind],
                                        training_data, all_mu[-1], 1, ind)
            if ind == (len(all_mu) - 1):
                # print out parameters of the Gaussian function
                for ind_2 in range(K_value):
                    print('Cluster:', ind_2)
                    print('Mu:')
                    print(out_para[str(ind_2)][0])
                    print('Covariance:')
                    print(out_para[str(ind_2)][1])
                    print('=====================')
        # drawing true gaussian functions
        if K_value == 3:
            out_param_true = drawing_Gaussian(true_mu, true_covariance,
                                              training_data, true_mu, 2, None)
            # print out parameters of the Gaussian function
            for ind_3 in range(K_value):
                print('Cluster:', ind_3)
                print('Actual Mu:')
                print(out_param_true[str(ind_3)][0])
                print('Actual Covariance:')
                print(out_param_true[str(ind_3)][1])
                print('=====================')
        # drawing log-likelihood values
        drawing_Log_likelihood(best_likelihood)

    elif (sys.argv[1] == 'saving'):
        # save the data as a csv file
        save_data_csv_file(training_data)
    else:
        print('Error Input. Please re-choose the task!!')
예제 #11
0
    mask = np.zeros_like(img)

    mask[:, :, 0] = probsReshaped
    mask[:, :, 1] = probsReshaped
    mask[:, :, 2] = probsReshaped

    return mask


if __name__ == '__main__':
    # Loading models
    path = 'models/'
    files = os.listdir(path)

    paths = ['Green_Resized/', 'Orange_Resized/', 'Yellow_Resized/']
    gmms = [GMM(nClusters=3), GMM(nClusters=3), GMM(nClusters=3)]  # TODO

    gParams, gMixture = None, None
    oParams, oMixture = None, None
    yParams, yMixture = None, None

    for file in files:
        filename = os.path.join(path, file)

        pickle_in = open(filename, "rb")
        model = pickle.load(pickle_in)

        for key, info in model.items():
            if key == paths[0]:
                gParams = info[0]
                gMixture = info[1]
예제 #12
0
@brief      TBD
@license    This project is released under the BSD-3-Clause license.
'''

import numpy as np
import cv2
from scipy.stats import multivariate_normal
from GMM import GMM

#image = cv2.imread('training_set/yellowTrainingSet.png')
image = cv2.imread('test_set/buoys.png')

height = image.shape[0]
width = image.shape[1]

yellowGMM = GMM()
yellowGMM.load('yellowGMM.npz')

orangeGMM = GMM()
orangeGMM.load('orangeGMM.npz')

greenGMM = GMM()
greenGMM.load('greenGMM.npz')
'''
yellowErrors = np.zeros((height, width))
orangeErrors = np.zeros((height, width))
greenErrors = np.zeros((height, width))

for i in range(height):
    for j in range(width):
        yellowErros[i] = yellowGMM.getLogLikelihoodError(image[i,j])
예제 #13
0
    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(X,
                                    n_neighbors=params['n_neighbors'],
                                    include_self=False)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)

    # ============
    # 初始化所有聚类算法
    # ============
    # 自编的K-Means、GMM算法
    my_kmeans0 = K_Means(n_clusters=params['n_clusters'], fit_method=0)
    my_kmeans1 = K_Means(n_clusters=params['n_clusters'], fit_method=1)
    my_kmeans2 = K_Means(n_clusters=params['n_clusters'], fit_method=2)
    my_kmeans3 = K_Means(n_clusters=params['n_clusters'], fit_method=3)
    my_gmm = GMM(n_clusters=params['n_clusters'], dim=X.shape[1])
    my_spectral_knn_reciprocal_normalized = SpectralClustering(
        n_clusters=params['n_clusters'], nnk=50)
    my_spectral_radius_reciprocal_normalized = SpectralClustering(
        n_clusters=params['n_clusters'], use_radius_nn=True, nnradius=1)
    my_spectral_knn_gauss05_normalized = SpectralClustering(
        n_clusters=params['n_clusters'], nnk=50, use_gauss_dist=True)
    my_spectral_knn_gauss005_normalized = SpectralClustering(
        n_clusters=params['n_clusters'],
        nnk=50,
        use_gauss_dist=True,
        gauss_sigma=5e-2)
    my_spectral_knn_reciprocal_unnormalized = SpectralClustering(
        n_clusters=params['n_clusters'], nnk=50, normalized=False)

    # sklearn中自带的算法
예제 #14
0
파일: testGMM.py 프로젝트: wsnpyo/ML
import numpy as np
import matplotlib.pyplot as plt

from GMM import GMM

if __name__ == '__main__':
    group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2))
    group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2))
    group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2))
    group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2))
    group_e = np.random.normal(loc=(10.00, 32.00), scale=(7.0, 7.0), size=(1000, 2))

    DATA = np.concatenate((group_a, group_b, group_c, group_d, group_e))
    S = GMM(5, DATA, 1e-3)
    S.fit()
    S.print_status()

    testdata = np.random.rand(10000, 2)*50
    labels = S.Classify(testdata)

    plt.scatter(testdata[:, 0], testdata[:, 1], c=list(map(lambda i : {0:'b',1:'g',2:'r',3:'y',4:'k'}[i], labels)))
    plt.show()

"""
chapter9 EM algorithm for Gaussian Misture Model
Using iris dataset for clustering

"""
import DatasetUtil as DS
from HTMLTable import HTMLTable
import re
from GMM import GMM

if __name__ == "__main__":
    print("\t============ Chap9 EM for GMM ============")

    ds = DS.DATAUtil()
    x_train, y_train = ds.load(True, r".\dataset.dat")
    model = GMM()
    model.train(x_train)

    y_pred = model.predict(x_train)
    y_train = ds.y_int2str(y_train)

    table = HTMLTable(caption='Iris Data Cluster')
    table.append_header_rows((
        ('No.', 'A1', 'A2', 'A3', 'A4', 'Classification', ''),
        ('', '', '', '', '', 'Label-C', 'Predict-C'),
    ))
    table[0][0].attr.rowspan = 2
    table[0][1].attr.rowspan = 2
    table[0][2].attr.rowspan = 2
    table[0][3].attr.rowspan = 2
    table[0][4].attr.rowspan = 2
예제 #16
0
from GMM import GMM
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

X, y = make_blobs(n_samples=1000, centers=4, n_features=2)

gmm_cls = GMM(initializer='uniform', cov_type='diag')
gmm_cls.fit(X, 4)

colors = []
for l in gmm_cls.kmeans_cls_.predict(X):
    if l == 0:
        colors.append('red')
    if l == 1:
        colors.append('green')
    if l == 2:
        colors.append('orange')
    if l == 3:
        colors.append('blue')

plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1)
plt.scatter(gmm_cls.means_[:, 0], gmm_cls.means_[:, 1], c='k')
plt.show()

plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1)
plt.scatter(gmm_cls.kmeans_cls_.means_[:, 0],
            gmm_cls.kmeans_cls_.means_[:, 1],
            c='k')
plt.show()
예제 #17
0
    def fit3(self, raw_colors):
        self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER]
        np.save('X', self._X)
        X = np.load('X.npy').astype('uint8')
        for i in range(len(X)):
            X[i][0], X[i][2] = X[i][2], X[i][0]

        X_hsv = X[np.newaxis, ...]
        hsv = cv2.cvtColor(X_hsv, cv2.COLOR_RGB2HSV)[0]
        centroid_id = list(range(4, 54, 9))
        feats = hsv[:, ::2]

        ax = pl.subplot(111)
        for i, c in enumerate(feats):
            if i % 9 == 4:
                # print(type(raw_colors[i]))
                pl.scatter(*feats[i], color=X[i] / 255.0, marker='x')
            else:
                pl.scatter(*feats[i], color=X[i] / 255.0, marker='o')
        pl.show()

        alphas = np.empty((6, 1))
        means = np.empty((6, feats.shape[1]))
        covs = np.empty((6, feats.shape[1], feats.shape[1]))
        for i in range(6):
            mean = feats[9 * i + 4]
            cov = np.array([[0.5 * 10, 0.0], [0.0, 2.5 * 10]])
            alphas[i] = 1 / 6.0
            means[i] = mean
            covs[i] = cov
            print('alpha:', alphas[i])
            print('mean:', mean)
            print('cov:', cov)
        self._gmm = GMM(6, feats, mu=means, sigma=covs, alpha=alphas)
        self._gmm.execute()
        print(self._gmm.alpha)
        print(self._gmm.mu)
        print(self._gmm.sigma)

        self._ans = []
        for xi in feats:
            probas = np.array([
                self._gmm.Normal(xi, self._gmm.mu[k], self._gmm.sigma[k],
                                 len(xi)) for k in range(6)
            ])
            self._ans.append(probas.argmax())
        print(np.array(self._ans).reshape(6, 3, 3))

        def eigsorted(cov):
            vals, vecs = np.linalg.eigh(cov)
            order = vals.argsort()[::-1]
            return vals[order], vecs[:, order]

        nstd = 2
        ax = pl.subplot(111)
        for i, c in enumerate(self._ans):
            if i % 9 == 4:
                pl.scatter(*feats[i],
                           color=X[centroid_id[c]] / 255.0,
                           marker='x')
            else:
                pl.scatter(*feats[i],
                           color=X[centroid_id[c]] / 255.0,
                           marker='o')

        for k in range(6):
            cov = self._gmm.sigma[k]
            vals, vecs = eigsorted(cov)
            theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))
            w, h = 2 * nstd * np.sqrt(vals)
            ell = mpl.patches.Ellipse(xy=self._gmm.mu[k],
                                      width=w,
                                      height=h,
                                      angle=theta,
                                      color=X[centroid_id[k]] / 255.0,
                                      alpha=0.5)
            ell.set_facecolor(X[centroid_id[k]] / 255.0)
            ax.add_artist(ell)
        for k in range(6):
            cov = covs[k]
            vals, vecs = eigsorted(cov)
            theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))
            w, h = 2 * nstd * np.sqrt(vals)
            ell = mpl.patches.Ellipse(xy=self._gmm.mu[k],
                                      width=w,
                                      height=h,
                                      angle=theta,
                                      color=X[centroid_id[k]] / 255.0,
                                      alpha=0.5)
            # ell.set_facecolor(X[centroid_id[k]]/255.0)
            ax.add_artist(ell)
        pl.show()
예제 #18
0
class Classifier(object):
    """docstring for Classifier."""

    FACELET_ORDER = [
        42,
        39,
        36,
        43,
        40,
        37,
        44,
        41,
        38,  # U
        9,
        10,
        11,
        12,
        13,
        14,
        15,
        16,
        17,  # R
        8,
        7,
        6,
        5,
        4,
        3,
        2,
        1,
        0,  # F
        45,
        46,
        47,
        48,
        49,
        50,
        51,
        52,
        53,  # D
        27,
        28,
        29,
        30,
        31,
        32,
        33,
        34,
        35,  # L
        26,
        25,
        24,
        23,
        22,
        21,
        20,
        19,
        18,  # B
    ]

    def __init__(self, face_order='URFDLB'):
        self._fig = pl.figure()
        self._axis = self._fig.add_subplot(111, projection='3d')
        self._face_order = face_order

    def fit(self, raw_colors):
        self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER]

        np.save('X', self._X)
        for label, color in enumerate(self._X):
            color[0], color[2] = color[2], color[0]
            self._axis.scatter(*color, color=color / 384)
        pl.show()

        self._X_pca = PCA(n_components=2).fit_transform(self._X)
        print(self._X_pca)
        np.save('pca', self._X_pca)
        for i, point in enumerate(self._X_pca):
            pl.scatter(*point, color=self._X[i] / 384)
        pl.show()

        centroids = np.array([self._X_pca[i] for i in range(4, 54, 9)])
        for i, point in enumerate(centroids):
            pl.scatter(*point, color=self._X[9 * i + 4] / 384)
        pl.show()

        print(centroids)
        self._model = KMeans(n_clusters=6, init=centroids)
        # self._model = GaussianMixture(n_components=6, means_init=centroids)
        self._model.fit(self._X_pca)

    def fit2(self, raw_colors):
        from sklearn.mixture import GaussianMixture
        self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER]
        for label, color in enumerate(self._X):
            color[0], color[2] = color[2], color[0]
            self._axis.scatter(*color, color=color / 384)
        pl.show()

        centroids = np.array([self._X[i] for i in range(4, 54, 9)])

        print(centroids)
        self._model = GaussianMixture(n_components=6, means_init=centroids)
        self._model.fit(self._X)
        self._X_pca = self._X

    def fit3(self, raw_colors):
        self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER]
        np.save('X', self._X)
        X = np.load('X.npy').astype('uint8')
        for i in range(len(X)):
            X[i][0], X[i][2] = X[i][2], X[i][0]

        X_hsv = X[np.newaxis, ...]
        hsv = cv2.cvtColor(X_hsv, cv2.COLOR_RGB2HSV)[0]
        centroid_id = list(range(4, 54, 9))
        feats = hsv[:, ::2]

        ax = pl.subplot(111)
        for i, c in enumerate(feats):
            if i % 9 == 4:
                # print(type(raw_colors[i]))
                pl.scatter(*feats[i], color=X[i] / 255.0, marker='x')
            else:
                pl.scatter(*feats[i], color=X[i] / 255.0, marker='o')
        pl.show()

        alphas = np.empty((6, 1))
        means = np.empty((6, feats.shape[1]))
        covs = np.empty((6, feats.shape[1], feats.shape[1]))
        for i in range(6):
            mean = feats[9 * i + 4]
            cov = np.array([[0.5 * 10, 0.0], [0.0, 2.5 * 10]])
            alphas[i] = 1 / 6.0
            means[i] = mean
            covs[i] = cov
            print('alpha:', alphas[i])
            print('mean:', mean)
            print('cov:', cov)
        self._gmm = GMM(6, feats, mu=means, sigma=covs, alpha=alphas)
        self._gmm.execute()
        print(self._gmm.alpha)
        print(self._gmm.mu)
        print(self._gmm.sigma)

        self._ans = []
        for xi in feats:
            probas = np.array([
                self._gmm.Normal(xi, self._gmm.mu[k], self._gmm.sigma[k],
                                 len(xi)) for k in range(6)
            ])
            self._ans.append(probas.argmax())
        print(np.array(self._ans).reshape(6, 3, 3))

        def eigsorted(cov):
            vals, vecs = np.linalg.eigh(cov)
            order = vals.argsort()[::-1]
            return vals[order], vecs[:, order]

        nstd = 2
        ax = pl.subplot(111)
        for i, c in enumerate(self._ans):
            if i % 9 == 4:
                pl.scatter(*feats[i],
                           color=X[centroid_id[c]] / 255.0,
                           marker='x')
            else:
                pl.scatter(*feats[i],
                           color=X[centroid_id[c]] / 255.0,
                           marker='o')

        for k in range(6):
            cov = self._gmm.sigma[k]
            vals, vecs = eigsorted(cov)
            theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))
            w, h = 2 * nstd * np.sqrt(vals)
            ell = mpl.patches.Ellipse(xy=self._gmm.mu[k],
                                      width=w,
                                      height=h,
                                      angle=theta,
                                      color=X[centroid_id[k]] / 255.0,
                                      alpha=0.5)
            ell.set_facecolor(X[centroid_id[k]] / 255.0)
            ax.add_artist(ell)
        for k in range(6):
            cov = covs[k]
            vals, vecs = eigsorted(cov)
            theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))
            w, h = 2 * nstd * np.sqrt(vals)
            ell = mpl.patches.Ellipse(xy=self._gmm.mu[k],
                                      width=w,
                                      height=h,
                                      angle=theta,
                                      color=X[centroid_id[k]] / 255.0,
                                      alpha=0.5)
            # ell.set_facecolor(X[centroid_id[k]]/255.0)
            ax.add_artist(ell)
        pl.show()

    def get_state(self):
        # if self._X is None:
        #     raise Error('Se debe hacer fit primero.')
        # pred2 = self._model.predict(self._X_pca)
        # state = ''.join([self._face_order[i] for i in pred2])
        # print(state)
        # pred2 = pred2.reshape(-1, 3, 3)
        # print(pred2)
        # return state
        FACES = 'URFDLB'
        state = ''.join([FACES[i] for i in np.array(self._ans).reshape(-1)])
        print(state)
        return state
예제 #19
0
class BuoyDetector:
    yellowGMM = GMM()
    orangeGMM = GMM()
    greenGMM = GMM()
    

    def __init__(self, yellowGMMParams, orangeGMMParams, greenGMMParams):
        self.yellowGMM.load(yellowGMMParams)
        self.orangeGMM.load(orangeGMMParams)
        self.greenGMM.load(greenGMMParams)
    

    def detectBuoys(self, frame):
        image = frame.copy()

        height = image.shape[0]
        width = image.shape[1]

        #print('BP1')

        yellowMask = np.zeros((height, width)).astype('uint8')
        orangeMask = np.zeros((height, width)).astype('uint8')
        greenMask = np.zeros((height, width)).astype('uint8')
        #print('BP2')

        yellowErrors = self.yellowGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3)))
        orangeErrors = self.orangeGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3)))
        greenErrors = self.greenGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3)))

        yellowErrors = np.reshape(yellowErrors, (height, width))
        orangeErrors = np.reshape(orangeErrors, (height, width))
        greenErrors = np.reshape(greenErrors, (height, width))

        for i in range(height):
            for j in range(width):
                yellowError = yellowErrors[i,j]
                orangeError = orangeErrors[i,j]
                greenError = greenErrors[i,j]

                if (yellowError > 11 and orangeError > 14 and greenError > 12.5):
                    continue
                elif (yellowError == min(yellowError, orangeError, greenError)):
                    yellowMask[i,j] = 255
                elif (orangeError == min(yellowError, orangeError, greenError)):
                    orangeMask[i,j] = 255
                elif (greenError == min(yellowError, orangeError, greenError)):
                    greenMask[i,j] = 255

        yellowMask = cv2.erode(yellowMask, None, iterations=1)
        yellowMask = cv2.dilate(yellowMask, None, iterations=2)

        orangeMask = cv2.erode(orangeMask, None, iterations=1)
        orangeMask = cv2.dilate(orangeMask, None, iterations=2)

        greenMask = cv2.erode(greenMask, None, iterations=1)
        greenMask = cv2.dilate(greenMask, None, iterations=2)

        yellowContours, hierarchy = cv2.findContours(yellowMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        orangeContours, hierarchy = cv2.findContours(orangeMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        greenContours, hierarchy = cv2.findContours(greenMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        if (len(yellowContours) != 0):
            maxContour = max(yellowContours, key = cv2.contourArea)
            center, radius = cv2.minEnclosingCircle(maxContour)
            cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \
                color=(0, 255, 255), thickness=2)
            cv2.circle(image, (int(center[0]), int(center[1])), 1, \
                color=(0, 0, 255), thickness=1)
            #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 255, 255), thickness=2)

        if (len(orangeContours) != 0):
            maxContour = max(orangeContours, key = cv2.contourArea)
            center, radius = cv2.minEnclosingCircle(maxContour)
            cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \
                color=(0, 125, 255), thickness=2)
            cv2.circle(image, (int(center[0]), int(center[1])), 1, \
                color=(0, 0, 255), thickness=1)
            #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 125, 255), thickness=2)

        if (len(greenContours) != 0):
            maxContour = max(greenContours, key = cv2.contourArea)
            center, radius = cv2.minEnclosingCircle(maxContour)
            cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \
                color=(0, 255, 0), thickness=2)
            cv2.circle(image, (int(center[0]), int(center[1])), 1, \
                color=(0, 0, 255), thickness=1)
            #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 255, 0), thickness=2)
        
        return image
    

    def runApplication(self, videoFile, saveVideo=False):
        # Create video stream object
        videoCapture = cv2.VideoCapture(videoFile)
        
        # Define video codec and output file if video needs to be saved
        if (saveVideo == True):
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            # 720p 30fps video
            out = cv2.VideoWriter('BuoyDetection.mp4', fourcc, 30, (1280, 720))

        # Continue to process frames if the video stream object is open
        while(videoCapture.isOpened()):
            ret, frame = videoCapture.read()

            # Continue processing if a valid frame is received
            if ret == True:
                newFrame = self.detectBuoys(frame)

                # Save video if desired, resizing frame to 720p
                if (saveVideo == True):
                    out.write(cv2.resize(newFrame, (1280, 720)))
                
                # Display frame to the screen in a video preview
                cv2.imshow("Frame", cv2.resize(newFrame, (1280, 720)))

                # Exit if the user presses 'q'
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            
            # If the end of the video is reached, wait for final user keypress and exit
            else:
                cv2.waitKey(0)
                break
        
        # Release video and file object handles
        videoCapture.release()
        if (saveVideo == True):
            out.release()
        
        print('Video and file handles closed')
예제 #20
0
kmeans_obj = KMeans(3, x)
kmeans_obj.fit(3, 0.002)

means = kmeans_obj.mean_vec
cov_mat_list = kmeans_obj.CovMatrix()
mixture_coeff = kmeans_obj.MixtureCoeff()

print(cov_mat_list)

"""from sklearn.cluster import KMeans
obj = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 100, n_init = 10, random_state = 0)
y_Kmeans = obj.fit_predict(x)
print(obj.cluster_centers_[:])"""

GMM_obj = GMM(3, x, means, cov_mat_list, mixture_coeff)
GMM_obj.fit(0.0002)

print(GMM_obj.mean_vec)
print(GMM_obj.cov_mat)
print(GMM_obj.mixture_coeff)

y_pred = GMM_obj.ClusterPredict(x)
plt.scatter(GMM_obj.x_train[y_pred == 0, 0], GMM_obj.x_train[y_pred == 0, 1], s = 20, c = 'red', label = 'Cluster 1')
plt.scatter(GMM_obj.x_train[y_pred == 1, 0], GMM_obj.x_train[y_pred == 1, 1], s = 20, c = 'green', label = 'Cluster 2')
plt.scatter(GMM_obj.x_train[y_pred == 2, 0], GMM_obj.x_train[y_pred == 2, 1], s = 20, c = 'blue', label = 'Cluster 3')
plt.scatter(GMM_obj.mean_vec[:, 0], GMM_obj.mean_vec[:, 1], s = 50, c = 'yellow', label = 'Centroids')
plt.show()

plt.scatter(GMM_obj.x_train[:, 0], GMM_obj.x_train[:, 1])
plt.show()
예제 #21
0
    def xai_feature(self, samp_num, option= 'None'):
        """extract the important features from the input data
        Arg:
            fea_num: number of features that needed by the user
            samp_num: number of data used for explanation
        return:
            fea: extracted features
        """
        print '----------------------------------------------------'
        print "parameters:"
        print "data:",self.data
        print "data shape:", self.data.shape
        print "seq_len:", self.seq_len
        print "start:", self.start
        print "sp:", self.sp
        print "real_sp:", self.real_sp
        print "pred:", self.pred
        print "trunc_len:", self.tl
        print "trunc_data",self.trunc_data
        print "trunc_data_test", self.trunc_data_test
        print '----------------------------------------------------'
        cen = self.seq_len/2
        half_tl = self.tl/2
        sample = np.random.randint(1, self.tl+1, samp_num)
        print "sample:",sample
        features_range = range(self.tl+1)
        data_explain = np.copy(self.trunc_data).reshape(1, self.trunc_data.shape[0])
        data_sampled = np.copy(self.trunc_data_test)
        for i, size in enumerate(sample, start=1):
            inactive = np.random.choice(features_range, size, replace=False)
            #print '\ninactive --->',inactive
            tmp_sampled = np.copy(self.trunc_data)
            tmp_sampled[inactive] = 0
            #tmp_sampled[inactive] = np.random.choice(range(257), size, replace = False)
            #print "trunc_data.shape", self.trunc_data.shape
            tmp_sampled = tmp_sampled.reshape(1, self.trunc_data.shape[0])
            data_explain = np.concatenate((data_explain, tmp_sampled), axis=0)
            #print "data_explain.shape", data_explain.shape
            data_sampled_mutate = np.copy(self.data)
            if self.real_sp < half_tl:
                data_sampled_mutate[0, 0:tmp_sampled.shape[1]] = tmp_sampled
            elif self.real_sp >= self.seq_len - half_tl:
                data_sampled_mutate[0, (self.seq_len - tmp_sampled.shape[1]): self.seq_len] = tmp_sampled
            else:
                data_sampled_mutate[0, (self.real_sp - half_tl):(self.real_sp + half_tl + 1)] = tmp_sampled
            data_sampled = np.concatenate((data_sampled, data_sampled_mutate),axis=0)

        if option == "Fixed":
            print "Fix start points"
            data_sampled[:, self.real_sp] = self.start
        label_sampled = self.model.predict(data_sampled, verbose = 0)[:, self.real_sp, 1]
        label_sampled = label_sampled.reshape(label_sampled.shape[0], 1)
        #X = r.matrix(data_explain, nrow = data_explain.shape[0], ncol = data_explain.shape[1])
        #Y = r.matrix(label_sampled, nrow = label_sampled.shape[0], ncol = label_sampled.shape[1])

        #n = r.nrow(X)
        #print "n:", n
        #p = r.ncol(X)
        #print "p:", p
        #print "np.sqrt(n*np.log(p)):", np.sqrt(n*np.log(p))
        
        #print "X_shape", X.dim
        #print "Y_shape", Y.dim
        
        #Mixture model fitting
        gmm = GMM(label_sampled,n_components=2).fit(data_explain)
        
        print gmm.converged_
        
        means = gmm.means_ 
        covariances = gmm.covariances_
        r_ik = np.zeros((samp_num+1,2))
        k=-1
        for m,c in zip(means,covariances):
            k += 1
            reg_cov = 5e-5*np.identity(self.tl+1)
            c = c + reg_cov
            #print "C:", c
            multi_normal = multivariate_normal(mean=m,cov=c)
            r_ik[:,k] = gmm.weights_[k] * multi_normal.pdf(data_explain)
        
        #mat_norm = np.zeros((501,501))
        #np.fill_diagonal(mat_norm, 1/np.sum(r_ik,axis=1))
        #P = mat_norm.dot(r_ik)
        res = np.argmax(r_ik, axis=1)
        
        
        # find the index for the best component
        best_component_idx = res[0]
        
        # fitting beta according to best component of mixture regression model
        
        # get the data for this component
        idx=np.where(res==best_component_idx)[0]
        X = r.matrix(data_explain[idx], nrow = len(idx), ncol = self.tl+1)
        Y = r.matrix(label_sampled[idx], nrow = len(idx), ncol = 1)

        n = r.nrow(X)
        print "n:", n
        p = r.ncol(X)
        print "p:", p
        print "np.sqrt(n*np.log(p)):", np.sqrt(n*np.log(p))
        
        # solve fused lasso by r library and get the importance score from the results
        print "X_shape", X.dim
        print "Y_shape", Y.dim
        results = r.fusedlasso1d(y=Y, X=X)
        #print "result_i", result_i
        result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1]
        print "result:", result
        
        
        #results = r.fusedlasso1d(y=Y,X=X)
        #result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1]
        
        # sorting the importance_score and return the important features
        importance_score = np.argsort(result)[::-1]
        print 'importance_score ...',importance_score 
        self.fea = (importance_score-self.tl/2)+self.real_sp
        self.fea = self.fea[np.where(self.fea<200)]
        self.fea = self.fea[np.where(self.fea>=0)]
        print 'self.fea ...',self.fea
        return self.fea
예제 #22
0
BETA = 1

# set random seed to reproduce
tf.random.set_seed(SEED)
np.random.seed(SEED)

#Get data
log = logger()
label_name = os.path.join("data", "labels")
data_name = os.path.join("data", "data_set")
labels = log.unpickle(label_name)
labels = np.expand_dims(labels, axis=-1)
data_set = log.unpickle(data_name)

#Initialize the models and optimizers
gmm_model = GMM(data_set.shape[1], K=K)
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.0001)  #Low learning rate needed!


#train function
@tf.function  #--> optimizes the program by making a graph
def train_models(data, label):
    with tf.GradientTape(persistent=True) as tape:
        sample, prob, mean, logvar = gmm_model(label)
        log_likelihood = gmm_model.log_likelihood(data, prob, mean, logvar)
    grad = tape.gradient(log_likelihood, gmm_model.variables)
    optimizer.apply_gradients(zip(grad, gmm_model.variables))
    del tape
    return log_likelihood
예제 #23
0
plt.plot(x2[:, 0], x2[:, 1], 'o')
plt.plot(np.mean(x2[:, 0]), np.mean(x2[:, 1]), 'x', color='black')
plt.text(np.mean(x2[:, 0]), np.mean(x2[:, 1]), '$\mu_{e}$')

plt.xlabel('x')
plt.ylabel('y')
plt.title('Labelled data and corresponding means')
plt.show()

M = 2
max_iter = 200
tol = 1e-3
diagonal = False

gmm = GMM(X, M)

# run the K-means algorithm firstly to initialize the means of the GMM algorithm
# mu_0 = random.sample(list(X), M)
# mu_0, D = k_means(X, M, mu_0=mu_0, max_iter=max_iter, tol=tol, interactive=False)

# 1.) EM algorithm for GMM:
# TODO
L = gmm.EM(max_iter=max_iter, tol=tol, interactive=False, diagonal=False)

plt.ioff()
plt.plot(L)
plt.xlabel('Iteration')
plt.ylabel('Value')
plt.title('EM log-likelihood function')
plt.show()
예제 #24
0
CURL_TRAIN_SIZE = 50 * N_CENTERS
CURL_TEST_SIZE = 1000
BATCH_SIZE = 100

# Model
INPUT_DIM = DIM
HIDDEN_DIM = 20
OUT_DIM = HIDDEN_DIM

# Training
N_EPOCH = 1000
LR = 1e-3

# Data generation
CENTERS = torch.randn(N_CENTERS * DIM).view(N_CENTERS, DIM)
gmm = GMM(DIM, CENTERS, VARIANCE)

X_train, y_train = gmm.sample(TRAIN_SAMPLES)
X_test, y_test = gmm.sample(TEST_SAMPLES)

train_CURL = ContrastiveDataset(
    *build_CURL_dataset(X_train, y_train, CURL_TRAIN_SIZE))
assert len(train_CURL) == CURL_TRAIN_SIZE
test_CURL = ContrastiveDataset(
    *build_CURL_dataset(X_test, y_test, CURL_TEST_SIZE))

train_data = GMMDataset(X_train, y_train)
test_data = GMMDataset(X_test, y_test)

train_loader = DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_data, shuffle=False, batch_size=BATCH_SIZE)
예제 #25
0
def main():
    Parser = argparse.ArgumentParser()
    Parser.add_argument('datagenModule', type=str)
    Parser.add_argument('algName', type=str)
    #TODO:Parser.add_argument( 'modelName', type=str )

    Parser.add_argument('-K', '--K', type=int, default=3)

    Parser.add_argument('--alpha0', type=float, default=1.0)
    Parser.add_argument('--covar_type', type=str, default='full')
    Parser.add_argument('--min_covar', type=float, default=1e-9)

    # Batch learning args
    Parser.add_argument('--nIter', type=int, default=100)

    # Online learning args
    Parser.add_argument('--batch_size', type=int, default=100)
    Parser.add_argument('--nBatch', type=int, default=50)
    Parser.add_argument('--nRep', type=int, default=1)
    Parser.add_argument('--rhoexp', type=float, default=0.5)
    Parser.add_argument('--rhodelay', type=float, default=1)

    # Generic args
    Parser.add_argument('--jobname', type=str, default='defaultjob')
    Parser.add_argument('--taskid', type=int, default=1)
    Parser.add_argument('--nTask', type=int, default=1)

    Parser.add_argument('--initname', type=str, default='random')
    Parser.add_argument('--seed', type=int, default=8675309)
    Parser.add_argument('-v',
                        '--doVerbose',
                        action='store_true',
                        default=False)
    Parser.add_argument('--printEvery', type=int, default=5)
    Parser.add_argument('--saveEvery', type=int, default=10)
    Parser.add_argument('--doProfile', action='store_true', default=False)
    args = Parser.parse_args()

    modelParams = dict()
    for argName in ['K', 'covar_type', 'min_covar', 'alpha0']:
        modelParams[argName] = args.__getattribute__(argName)

    dataParams = dict()
    for argName in ['nBatch', 'nRep', 'batch_size', 'seed']:
        dataParams[argName] = args.__getattribute__(argName)

    algParams = dict()
    for argName in ['initname', 'nIter', 'rhoexp', 'rhodelay', \
                    'nIter', 'printEvery', 'saveEvery']:
        algParams[argName] = args.__getattribute__(argName)

    # Dynamically load module provided by user as data-generator
    #   this must implement a generator function called "minibatch_generator" or "get_data"
    datagenmod = __import__('GMM.data.' + args.datagenModule,
                            fromlist=['GMM', 'data'])
    if 'print_data_info' in dir(datagenmod):
        datagenmod.print_data_info()

    gmm = GMM.GMM(**modelParams)
    gmm.print_model_info()

    for task in xrange(args.taskid, args.taskid + args.nTask):
        basepath = os.path.join('results', args.algName, args.jobname,
                                str(task))
        mkpath(basepath)
        algParams['savefilename'] = os.path.join(basepath, 'trace')
        seed = hash(args.jobname + str(task)) % np.iinfo(int).max
        algParams['seed'] = seed

        print 'Trial %2d/%d | savefile: %s | seed: %d' % (
            task, args.nTask, algParams['savefilename'], algParams['seed'])

        if args.algName.startswith('o') and args.algName.count('EM') > 0:
            DataGen = datagenmod.minibatch_generator(**dataParams)
            gmm = GMM.GMM(**modelParams)
            em = OEM.OnlineEMLearnerGMM(gmm, **algParams)
            em.fit(DataGen, seed)
        elif args.algName.count('sklearnEM') > 0:
            sklgmm = sklearn.mixture.GMM(  n_components=args.K, random_state=seed, covariance_type=args.covar_type, \
                                           min_covar=args.min_covar, n_init=1, n_iter=args.nIter, init_params='' )
            X = datagenmod.get_data(**dataParams)

            gmm = GMM.GMM(**modelParams)
            em = EM.EMLearnerGMM(gmm, **algParams)
            em.init_params(X, seed=seed)

            sklgmm.weights_ = gmm.w
            sklgmm.means_ = gmm.mu
            sklgmm.covars_ = gmm.Sigma

            sklgmm.fit(X)
        elif args.algName.count('EM') > 0:
            Data = datagenmod.get_data(**dataParams)
            gmm = GMM.GMM(**modelParams)
            em = EM.EMLearnerGMM(gmm, **algParams)
            em.fit(Data, seed)
        elif args.algName.count('DPVB') > 0:
            Data = datagenmod.get_data(**dataParams)
            D = Data.shape[1]
            gw = GaussWishDistr.GaussWishDistr(D=D)
            qdp = QDPGMM.QDPGMM(gw, **modelParams)
            em = VB.VBLearnerGMM(qdp, **algParams)
            em.fit(Data, seed)
        elif args.algName.count('VB') > 0:
            Data = datagenmod.get_data(**dataParams)
            D = Data.shape[1]
            dF = D + 1
            invW = np.eye(D)
            gw = GaussWishDistr.GaussWishDistr(D=D)
            qgmm = QGMM.QGMM(gw, **modelParams)
            em = VB.VBLearnerGMM(qgmm, **algParams)
            em.fit(Data, seed)
예제 #26
0
from GMM import GMM
from sklearn import mixture

# generate the dataset
X, Y = make_classification(n_samples=1000,
                           n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           n_clusters_per_class=2)

X = preprocessing.scale(X)

num_clusters = 3
num_epochs = 50

gmm_model = GMM()
phi, pi_dist, mean, covariance = gmm_model.fit(X,
                                               num_clusters=num_clusters,
                                               num_epochs=num_epochs)

gmm_sklearn = mixture.GaussianMixture(n_components=2)
gmm_sklearn.fit(X)
plt.figure(figsize=(8, 8))
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.9)

plt.subplot(211)
plt.title('Plot for the unclustered data', fontsize='small')
plt.scatter(X[:, 0], X[:, 1], s=25, c=None)

plt.subplot(212)
plt.title('Plot for the clustered data', fontsize='small')
예제 #27
0
i = np.random.randint(0, nSamples-1, nComp)
means = samples[i]

covars = np.empty((nComp, 3), np.float32)
covars[:] = 10

gmm_cpu = mixture.GMM(nComp)
gmm_cpu.dtype = np.float32
gmm_cpu.init_params = ''
gmm_cpu.means_ = means
gmm_cpu.weights_ = weights
gmm_cpu.covars_ = covars
gmm_cpu.fit(samples)

gmm = GMM(context, nIter, nComp, nSamples)

a = calcA_cpu(weights, means, covars)
cl.enqueue_copy(queue, gmm.dA, a).wait()

gmm.has_preset_wmc = True
w,m,c = gmm.fit(dSamples, nSamples, retParams=True)
print 'converged: {0}'.format(gmm.has_converged)

print gmm_cpu.weights_
print w
print
print gmm_cpu.means_
print m
print
print gmm_cpu.covars_
예제 #28
0
# 2. 再分类
#=======================================================================

from GMM import GMM
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from util import *

# 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图
X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0)
# 划分数据,一部分用于训练聚类,一部分用于分类
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

model = GMM(X_train, K=10)
# 获取各个类别的概率
result = model.fit()
print('每条数据属于各个类别的概率如下: ', result)

# 获取每条数据所在的类别
label_train = np.argmax(result, axis=1)
print(label_train)

# 获取测试数据所在的类别的概率
result_test = model.predict(X_test)
# 获取测试数据的类别
label_test = np.argmax(result_test, axis=1)

# 展示原始数据分布及其label
ax1 = plt.subplot(211)
예제 #29
0
파일: test_GMM.py 프로젝트: FrenkT/GMM
import numpy as np
import matplotlib.pyplot as plt

from GMM import GMM

if __name__ == '__main__':
    group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2))
    group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2))
    group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2))
    group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2))

    data = np.concatenate((group_a, group_b, group_c, group_d))

    g = GMM(n_components=4)
    eval_train = g.train(data)
    for c in g.components:
        print '*****'
        print c.mean
        print c.cov

    plt.plot(eval_train)
    plt.show()
    # estimate bandwidth for mean shift
    bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])

    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(X,
                                    n_neighbors=params['n_neighbors'],
                                    include_self=False)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)

    # ============
    # 初始化所有聚类算法
    # ============
    # 自编的K-Means、GMM算法
    my_kmeans = K_Means(n_clusters=params['n_clusters'])
    my_gmm = GMM(n_clusters=params['n_clusters'])
    my_spec = Spectral()
    # sklearn中自带的算法
    ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])
    ward = cluster.AgglomerativeClustering(n_clusters=params['n_clusters'],
                                           linkage='ward',
                                           connectivity=connectivity)
    spectral = cluster.SpectralClustering(n_clusters=params['n_clusters'],
                                          eigen_solver='arpack',
                                          affinity="nearest_neighbors")
    dbscan = cluster.DBSCAN(eps=params['eps'])
    optics = cluster.OPTICS(min_samples=params['min_samples'],
                            xi=params['xi'],
                            min_cluster_size=params['min_cluster_size'])
    affinity_propagation = cluster.AffinityPropagation(
예제 #31
0
class QuickBrush(Brush):
    lWorksize = (16, 16)

    def __init__(self, context, devices, d_img, d_labels):
        Brush.__init__(self, context, devices, d_labels)

        self.context = context
        self.queue = cl.CommandQueue(context,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

        nComponentsFg = 4
        nComponentsBg = 4
        self.nDim = 3

        self.dim = d_img.dim

        filename = os.path.join(os.path.dirname(__file__), 'quick.cl')
        program = createProgram(context, context.devices, [], filename)
        #        self.kernSampleBg = cl.Kernel(program, 'sampleBg')
        self.kern_get_samples = cl.Kernel(program, 'get_samples')

        self.lWorksize = (16, 16)
        self.gWorksize = roundUp(self.dim, self.lWorksize)

        nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * (
            self.gWorksize[1] / self.lWorksize[1])

        #		self.gmmFg_cpu = mixture.GMM(4)

        self.gmmFg = GMM(context, 65, nComponentsFg, 10240)
        self.gmmBg = GMM(context, 65, nComponentsBg, nSamples)

        self.hScore = np.empty(self.dim, np.float32)
        self.hSampleFg = np.empty((10240, ), np.uint32)
        self.hSampleBg = np.empty((12000, ), np.uint32)
        self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32)

        self.d_img = d_img

        cm = cl.mem_flags
        self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240)
        self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000)
        self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA)
        self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)
        self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)

        #self.points = Set()

        self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300
        self.points = np.empty((self.capPoints), np.uint32)

        #		self.colorize = Colorize.Colorize(clContext, clContext.devices)

        #        self.hTriFlat = self.hTri.reshape(-1)

        #        self.probBg(1200)

        self.h_img = np.empty(self.dim, np.uint32)
        self.h_img = self.h_img.ravel()
        cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait()

        self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000)
        self.hSampleBg = self.h_img[self.samples_bg_idx]

        cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait()

        w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True)

        print w
        print m
        print c

        self.gmmBg.score(self.d_img, self.dScoreBg)

        pass

    def draw(self, p0, p1):
        Brush.draw(self, p0, p1)
        #self.probFg(x1-20, x1+20, y1-20, y1+20)
        #return
        """color = self.colorTri[self.type]

        #self.argsScore[5] = np.int32(self.nComponentsFg)

        #seed = []
        hasSeeds = False
        redoBg = False

        minX = sys.maxint
        maxX = -sys.maxint
        minY = sys.maxint
        maxY = -sys.maxint

        for point in self.points[0:nPoints]:
            #if self.hTriFlat[point] != color:
                self.hTriFlat[point] = color
                #seed += point
                hasSeeds = True

                minX = min(minX, point%self.width)

                maxX = max(maxX, point%self.width)
                minY = min(minY, point/self.width)
                maxY = max(maxY, point/self.width)

                #if (point[1]*self.width + point[0]) in self.randIdx:

                #	redoBg = True
        #if redoBg:
        #	self.probBg(0)

        #if len(seed) == 0:
        if not hasSeeds:
            return

        minX = max(0, minX-DILATE)
        maxX = min(self.width-1, maxX + DILATE)
        minY = max(0, minY-DILATE)
        maxY = min(self.height-1, maxY + DILATE)
        """

        args = [
            np.int32(self.n_points),
            self.d_points,
            cl.Sampler(self.context, False, cl.addressing_mode.NONE,
                cl.filter_mode.NEAREST),
            self.d_img,
            self.dSampleFg
        ]

        gWorksize = roundUp((self.n_points, ), (256, ))

        self.kern_get_samples(self.queue, gWorksize, (256,), *args).wait()

        cl.enqueue_copy(self.queue, self.hSampleFg, self.dSampleFg)
#        print self.hSampleFg.view(np.uint8).reshape(10240, 4)[0:self.n_points, :]

#        print self.n_points
        self.gmmFg.fit(self.dSampleFg, self.n_points)
#        print w
#        print m
#        print c

        self.gmmFg.score(self.d_img, self.dScoreFg)

        #        self.argsSampleBg = [
        #            self.d_labels,
        #            np.int32(self.label),
        #            cl.Sampler(self.context, False, cl.addressing_mode.NONE,
        #                cl.filter_mode.NEAREST),
        #            self.d_img,
        #            self.dSampleFg
        #        ]
        #
        #        gWorksize = roundUp(self.dim, (16, 16))
        #
        #        self.kernSampleBg(self.queue, gWorksize, (16, 16),
        #            *(self.argsSampleBg)).wait()
        #        cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait()

        pass

    def probFg(self, d_samples, n_points):
    #		if True:
    #			tri = self.hTri[minY:maxY, minX:maxX]
    #			b = (tri == self.colorTri[self.type])
    #
    #			samplesFg = self.hSrc[minY:maxY, minX:maxX]
    #			samplesFg = samplesFg[b]
    #		else:
    #			DILATE = 5
    #			samplesFg = self.hSrc[minY:maxY, minX:maxX].ravel()

        #gpu = False
        #self.prob(self.gmmFG, samplesFg, self.dScoreFg, gpu)

        #self.gmmFg_cpu.fit(samplesFg)
        #print 'cpu', self.gmmFg_cpu.weights_
        #a = calcA_cpu(self.gmmFg_cpu.weights_.astype(np.float32), self.gmmFg_cpu.means_.astype(np.float32), self.gmmFg_cpu.covars_.astype(np.float32))
        #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait()

        #weights, means, covars = self.gmmFg.fit(samplesFg, retParams=True)
        #a = calcA_cpu(weights, means[:, 0:3], covars[:, 0:3])
        #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait()


        w,m,c = self.gmmFg.fit(d_samples, n_points, retParams=True)
        print w
        print m
        print c
        #print 'gpu', weights

        self.gmmFg.score(self.d_img, self.dScoreFg)

    #score returns float64, not float32 -> convert with astype
    #self.hScore = -self.gmmFG.score(self.rgb.reshape(-1, 3)).astype(np.float32)
    """
        def drawCircle(self, xc, yc, points=None):
            r = self.radius

            for y in xrange(-r, r):
                for x in xrange(-r, r):
                    if points != None:
                        points.add((xc+x, yc+y))
        """

    def probBg(self, nSamples):
        #self.kernSampleBg(self.queue, self.gWorksize, self.lWorksize, *(self.argsSampleBg)).wait()
        #cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait()

        self.bgIdx = np.where(self.hTri.ravel() != self.colorTri[self.type])[0]
        self.randIdx = self.bgIdx[np.random.randint(0, len(self.bgIdx), 2000)]
        self.bgIdx = np.setdiff1d(self.bgIdx, self.randIdx)

        self.hSampleBg[0:len(self.randIdx)] = self.hSrc.view(np.uint32).ravel()[
                                              self.randIdx]
        cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait()

        #print self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx], retParams=True)
        self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx])
        #self.gmmBg.fit(self.dSampleBg, nSamples=len(self.randIdx))
        self.gmmBg.score(self.dSrc, self.dScoreBg)
예제 #32
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 02:43:24 2019

@author: maachou
"""

from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from GMM import GMM        
mix=GMM(K=6)   
X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6)
plt.scatter(X[:,0],X[:,1])
print(X.shape)
mix.fit(X)
mix.Means()
Y=mix.predict(X)
plt.scatter(X[:,0],X[:,1],c=Y)
예제 #33
0
    def __init__(self, context, devices, d_img, d_labels):
        Brush.__init__(self, context, devices, d_labels)

        self.context = context
        self.queue = cl.CommandQueue(context,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

        nComponentsFg = 4
        nComponentsBg = 4
        self.nDim = 3

        self.dim = d_img.dim

        filename = os.path.join(os.path.dirname(__file__), 'quick.cl')
        program = createProgram(context, context.devices, [], filename)
        #        self.kernSampleBg = cl.Kernel(program, 'sampleBg')
        self.kern_get_samples = cl.Kernel(program, 'get_samples')

        self.lWorksize = (16, 16)
        self.gWorksize = roundUp(self.dim, self.lWorksize)

        nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * (
            self.gWorksize[1] / self.lWorksize[1])

        #		self.gmmFg_cpu = mixture.GMM(4)

        self.gmmFg = GMM(context, 65, nComponentsFg, 10240)
        self.gmmBg = GMM(context, 65, nComponentsBg, nSamples)

        self.hScore = np.empty(self.dim, np.float32)
        self.hSampleFg = np.empty((10240, ), np.uint32)
        self.hSampleBg = np.empty((12000, ), np.uint32)
        self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32)

        self.d_img = d_img

        cm = cl.mem_flags
        self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240)
        self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000)
        self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA)
        self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)
        self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)

        #self.points = Set()

        self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300
        self.points = np.empty((self.capPoints), np.uint32)

        #		self.colorize = Colorize.Colorize(clContext, clContext.devices)

        #        self.hTriFlat = self.hTri.reshape(-1)

        #        self.probBg(1200)

        self.h_img = np.empty(self.dim, np.uint32)
        self.h_img = self.h_img.ravel()
        cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait()

        self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000)
        self.hSampleBg = self.h_img[self.samples_bg_idx]

        cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait()

        w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True)

        print w
        print m
        print c

        self.gmmBg.score(self.d_img, self.dScoreBg)

        pass
예제 #34
0
    def fit(self, X, K, eps=pow(10, -2)):
        # fits the parameters of the HMM using EM algorithm
        # X is the sequence of observations (array of size (T,D)),
        # K is the number of hidden states
        # eps : tolerance on log likelihood difference between two iterations for convergence of EM algorithm

        self.K = K
        T, D = X.shape

        # initialization of means and covariances with GMM
        print(
            "Initialization of Gaussians parameters (means and covariances) with GMM : "
        )
        gmm_model = GMM(isotropic=False)
        gmm_model.fit(X, K, eps=eps)
        self.mus = gmm_model.mus
        self.Sigmas2 = gmm_model.Sigmas2

        print("\nFit of HMM : ")
        # initialization of pis and A at random
        self.pis = np.random.rand(self.K)
        self.pis /= np.sum(self.pis)
        self.A = np.random.rand(self.K, self.K)
        self.A /= np.sum(self.A, axis=1)[:, None]

        lik = self.compute_log_likelihood(X)
        print("Initial log-likelihood : ", lik)

        delta_lik = 1
        cpt_iter = 1

        while (delta_lik > eps):

            # Expectation step
            pi = self.compute_proba_Zt_cond_X(
                X)  # array (T,K) (t,i) -> p(z_t = i|X; θ)
            pij = self.compute_proba_Zt_and_Znext_cond_X(
                X)  # tensor (T-1,K,K) (t,i,j) -> p(z_(t+1) = j, z t = i|X; θ)

            # Maximization step

            self.pis = pi[0, :]
            pi_repeated = pi[:, :, np.newaxis]  # (T,K,D)
            self.mus = np.sum(pi_repeated * X[:, np.newaxis, :],
                              axis=0) / np.sum(pi_repeated, axis=0)

            self.Sigmas2 = []
            for k in range(self.K):
                Xc = X - self.mus[k]
                Sigmas2k = 0
                for t in range(T):
                    xt = Xc[t, :][:, None]  # size (d,1)
                    Sigmas2k += np.dot(xt, xt.T) * pi[t, k]
                Sigmas2k /= np.sum(pi[:, k])
                self.Sigmas2.append(Sigmas2k)
            self.Sigmas2 = np.array(self.Sigmas2)

            self.A = np.sum(pij, axis=0) / np.sum(pi[:-1], axis=0)[:, None]

            # Computing new likelihood, and deciding if we should stop
            old_lik = lik  # storing old_likelihood to compute delta_lik
            lik = self.compute_log_likelihood(X)  # storing new likelihood
            delta_lik = lik - old_lik  # measure to decide if we should stop or iterate again
            print("Iter " + str(cpt_iter) + " ; log_likelihood : " + str(lik))
            cpt_iter += 1

        print("EM algorithm converged.")

        print("initial distribution found (rounded, 2 decimals) : ",
              np.round(self.pis, 2))
        print("transition matrix found (rounded, 2 decimals) : ",
              np.round(self.A, 2))
예제 #35
0
def test_data(length, array_length):
    #txtName = "causal_continue_noise_0.4_normal_sample_1000_length_200.txt"
    #f = file(txtName, "a+")
    counter11 = 0
    counter10 = 0
    counter01 = 0
    counter00 = 0
    counter11_01 = 0
    counter10_01 = 0
    counter01_01 = 0
    counter00_01 = 0
    counter_undecided = 0
    counter_true = 0
    counter_false = 0
    counter_undecided2 = 0
    counter_true2 = 0
    counter_false2 = 0
    counter_error_1 = 0
    counter_error_2 = 0
    p_array_granger1 = []
    p_array_granger2 = []
    p_array_CUTE1 = []
    p_array_CUTE2 = []
    p_array_improve_CUTE1 = []
    p_array_improve_CUTE2 = []
    p_array1 = []
    p_array2 = []
    p_array_granger = []
    for i in range(0, 1000):
        write_str = ""
        p = random.randint(1, 3)
        #effect, test1 = generate_continue_data(200, p)

        #cause, effect = generate_continue_data(150, p)
        #cause_tmp = list(cause)
        #effect_tmp = list(effect)
        #cause = zero_change(cause)
        #effect = zero_change(effect)

        #cause,effect = generate_continue_data_with_change_lag(350,10)
        cause = GMM(3, array_length)
        effect = GMM(5, array_length)

        cause_tmp = list(cause)
        effect_tmp = list(effect)

        #effect = forward_shift_continue_data(cause,p)
        #noise = np.random.normal(0, 0.1, 200)
        #for j in range(0, 200):
        #    effect[j] = effect[j] + noise[j]

        #for i in range(0,len(cause)):
        #cause[i]=math.tanh(cause[i])
        #cause[i] = math.pow(math.e,cause[i])
        #effect[i] = math.pow(math.e,effect[i])
        #cause[i] = math.pow(cause[i],3)/10
        #effect[i] = math.pow(effect[i],3)/10
        #effect[i]=math.tanh(effect[i])
        #effect[i] = math.pow(effect[i],3)
        #effect = GMM(8,200)

        #plt.plot(cause)
        #plt.plot(effect)
        #plt.show()
        #cause = normalize(cause)
        #effect = normalize(effect)

        #cause = normalize_data(cause)
        #effect = normalize_data(effect)

        #cause = zero_change(cause)
        #effect = zero_change(effect)
        from scipy.special import expit
        #for i in range(0,len(effect)):
        #effect[i]=expit(effect[i])
        #effect[i] = 1.0/effect[i]

        for ii in range(0, len(cause)):
            write_str = write_str + " " + str(cause[ii])
        for jj in range(0, len(effect)):
            write_str = write_str + " " + str(effect[jj])
        #print "cause:" + str(cause)
        #print "effect:" + str(effect)
        # effect, test2 = ge_normal_data(p,200)

        print "Continuous data, Granger causality test"
        print "cause->effect"
        p_value_cause_to_effect1 = []
        flag1 = False
        #ce1 = grangercausalitytests([[effect[i], cause[i]] for i in range(0, len(cause))], p)
        ce_p = granger(cause, effect, -1)
        #for key in ce1:
        #    p_value_cause_to_effect1.append(ce1[key][0]["params_ftest"][1])
        #   if ce1[key][0]["params_ftest"][1] < 0.05:
        #        flag1 = True
        if ce_p < 0.05:
            flag1 = True
        print "effect->cause"
        p_value_effect_to_cause2 = []
        flag2 = False
        #ce2 = grangercausalitytests([[cause[i], effect[i]] for i in range(0, len(cause))], p)
        ce2_p = granger(effect, cause, -1)
        #for key in ce2:
        #    p_value_effect_to_cause2.append(ce2[key][0]["params_ftest"][1])
        #    if ce2[key][0]["params_ftest"][1] < 0.05:
        #        flag2 = True
        if ce2_p < 0.05:
            flag2 = True
        if ce_p < 0.05:
            p_array_granger1.append(ce_p)
        elif ce2_p < 0.05:
            p_array_granger2.append(ce2_p)
        if flag1 and flag2:
            print "Continuous data,Granger two-way cause and effect"
            write_str = write_str + " " + "连续数据,格兰杰双向因果"
            counter11 += 1
        elif flag1 and not flag2:
            print "Continuous data,Granger correct cause and effect"
            write_str = write_str + " " + "连续数据,格兰杰正确因果"
            counter10 += 1
            p_array_granger.append(ce_p)
        elif not flag1 and flag2:
            print "Continuous data,Granger wrong cause and effect"
            write_str = write_str + " " + "连续数据,格兰杰错误因果"
            counter01 += 1
        elif not flag1 and not flag2:
            print "Continuous data,Granger no cause and effect"
            write_str = write_str + " " + "连续数据,格兰杰没有因果"
            #break
            counter00 += 1
        #write_str = write_str + " " + str(min(p_value_cause_to_effect1)) + " " + str(min(p_value_effect_to_cause2))
        cause2 = get_type_array(cause, length)
        effect2 = get_type_array(effect, length)
        print "01 data, Granger causality test"
        print "cause->effect"
        p_value_cause_to_effect3 = []
        flag3 = False
        #ce3 = grangercausalitytests([[effect2[i], cause2[i]] for i in range(0, len(cause2))], p)
        ce3_p = granger(cause2, effect2, -1)
        #for key in ce3:
        #    p_value_cause_to_effect3.append(ce3[key][0]["params_ftest"][1])
        #    if ce3[key][0]["params_ftest"][1] < 0.05:
        #        flag3 = True
        if ce3_p < 0.05:
            flag3 = True
        print "effect->cause"
        p_value_effect_to_cause4 = []
        flag4 = False
        #ce4 = grangercausalitytests([[cause2[i], effect2[i]] for i in range(0, len(cause2))], p)
        ce4_p = granger(effect2, cause2, -1)
        #for key in ce4:
        #    p_value_effect_to_cause4.append(ce4[key][0]["params_ftest"][1])
        #    if ce4[key][0]["params_ftest"][1] < 0.05:
        #        flag4 = True
        if ce4_p < 0.05:
            flag4 = True
        if flag3 and flag4:
            print "01 data,Granger two-way cause and effect"
            write_str = write_str + " " + "离散数据,格兰杰双向因果"
            counter11_01 += 1
        elif flag3 and not flag4:
            print "01 data,Granger correct cause and effect"
            write_str = write_str + " " + "离散数据,格兰杰正确因果"
            counter10_01 += 1
        elif not flag3 and flag4:
            print "01 data,Granger wrong cause and effect"
            write_str = write_str + " " + "离散数据,格兰杰错误因果"
            counter01_01 += 1
        elif not flag3 and not flag4:
            print "01 data,Granger no cause and effect"
            write_str = write_str + " " + "离散数据,格兰杰没有因果"
            counter00_01 += 1
        #write_str = write_str + " " + str(min(p_value_cause_to_effect3)) + " " + str(min(p_value_effect_to_cause4))
        print

        delta_ce = calculate_difference3(cause, effect, length)
        delta_ec = calculate_difference3(effect, cause, length)
        print 'cause' + ' -> ' + 'effect' + ':' + str(delta_ce)
        print 'effect' + ' -> ' + 'cause' + ':' + str(delta_ec)
        if delta_ce > delta_ec and delta_ce - delta_ec >= -math.log(0.05, 2):
            print "CUTE,correct cause and effect"
            write_str = write_str + " " + "CUTE,正确因果"
            counter_true += 1
        elif delta_ec > delta_ce and delta_ec - delta_ce >= -math.log(0.05, 2):
            print "CUTE,wrong cause and effect"
            write_str = write_str + " " + "CUTE,错误因果"
            counter_false += 1
        else:
            print "CUTE,undecided"
            write_str = write_str + " " + "CUTE,未决定"
            counter_undecided += 1

        write_str = write_str + " " + str(pow(2, -abs(delta_ce - delta_ec)))
        p = math.pow(2, -(delta_ce - delta_ec))
        if p < 1:
            p_array1.append(p)
        else:
            p_array2.append(math.pow(2, -(delta_ec - delta_ce)))
        #f.write(write_str)
        #f.write("\n")
        cause = change_to_zero_one(cause_tmp)
        effect = change_to_zero_one(effect_tmp)
        cause2effect = bernoulli2(effect, length) - cbernoulli2(
            effect, cause, length)
        effect2cause = bernoulli2(cause, length) - cbernoulli2(
            cause, effect, length)
        # print 'cause' + ' -> ' + 'effect' + ':' + str(cause2effect)
        # print 'effect' + ' -> ' + 'cause' + ':' + str(effect2cause)
        p = math.pow(2, -(cause2effect - effect2cause))
        if p < 1:
            p_array_improve_CUTE1.append(p)
        else:
            p_array_improve_CUTE2.append(
                math.pow(2, -(effect2cause - cause2effect)))

        cause2effect = bernoulli(effect) - cbernoulli(effect, cause)
        effect2cause = bernoulli(cause) - cbernoulli(cause, effect)
        if p < 1:
            p_array_CUTE1.append(p)
        else:
            p_array_CUTE2.append(math.pow(2, -(effect2cause - cause2effect)))

        print
        print "*****************************cut line*****************************"
        print
    #f.close()
    print "连续数据,格兰杰因果关系检验:"
    print "双向因果:" + str(counter11)
    print "正确因果:" + str(counter10)
    print "错误因果:" + str(counter01)
    print "没有因果" + str(counter00)
    print "-----------------"
    print "离散数据,格兰杰因果关系检验:"
    print "双向因果:" + str(counter11_01)
    print "正确因果:" + str(counter10_01)
    print "错误因果:" + str(counter01_01)
    print "没有因果" + str(counter00_01)
    print "-----------------"
    print "discret  data,snml causality test:"
    print "correct cause and effect:" + str(counter_true)
    print "wrong cause and effect:" + str(counter_false)
    print "no cause and effect:" + str(counter_undecided)
    print "-----------------"
    print "01 data,CUTE causality test:"
    granger_test = (bh_procedure(p_array_granger1, 0.05) +
                    bh_procedure(p_array_granger2, 0.05)) / 1000.0
    ourmodel = (bh_procedure(p_array1, 0.05) +
                bh_procedure(p_array2, 0.05)) / 1000.0
    cute = (bh_procedure(p_array_CUTE1, 0.05) +
            bh_procedure(p_array_CUTE2, 0.05)) / 1000.0
    improve_cute = (bh_procedure(p_array_improve_CUTE1, 0.05) +
                    bh_procedure(p_array_improve_CUTE2, 0.05)) / 1000.0
    print granger_test
    print improve_cute
    print ourmodel
    return granger_test, ourmodel, cute, improve_cute