def init_params(X, K, theta_init=None, parameters=None):
    # Here we will initialize the  theta parameters of the mixture model
    # THETA PARAMETERS OF THE K COMPONENTS
    # Give random values to the theta parameters (parameters of the distribution)
    # In this case the parameters are theta = (mu, kappa)

    # We need at least the number of clusters K and the dimensionality of the
    # distirbutoin D.
    N, D = X.shape
    if (type(theta_init) == type(None)):  # If not given an initialization
        mus = np.random.randn(D, K)
        mus = gf.normalize_module(mus.T).T
        #        print mus

        Kappa_min = parameters["Kappa_min_init"]
        Kappa_max = parameters["Kappa_max_init"]

        kappas = np.random.uniform(Kappa_min, Kappa_max, K)
        kappas = kappas.reshape(1, K)
        ####### Put theta in the right format ###########
        theta = []
        for k in range(K):
            theta.append([mus[:, [k]], kappas[:, [k]]])
    else:
        return theta_init

    return theta
def plot_trials_for_same_instance(X_data_trials,
                                  X_data_labels,
                                  X_train,
                                  y_train,
                                  colors=["r", "k"],
                                  time_show=100,
                                  normalize=True):

    # For a fixed time instant, the plotting of points from several trials of both classes
    gl.scatter_3D(0, 0, 0, nf=1, na=0)
    for i in range(len(X_data_trials)):
        class_i = X_data_labels[i]
        if (normalize == True):
            caca = gf.normalize_module(X_data_trials[i][[time_show], :])
        else:
            caca = X_data_trials[i][[time_show], :]

        caca = caca.flatten()
        gl.scatter_3D(
            caca[0],
            caca[1],
            caca[2],
            nf=0,
            na=0,
            color=colors[class_i],
            labels=[
                "Trials for the same time instant for different classes", "D1",
                "D2", "D3"
            ])
Example #3
0
def init_params(X, K, theta_init = None, parameters = None):
    # Here we will initialize the  theta parameters of the mixture model
    # THETA PARAMETERS OF THE K COMPONENTS
    # Give random values to the theta parameters (parameters of the distribution)
    # In this case the parameters are theta = (mu, kappa)
    
    # We need at least the number of clusters K and the dimensionality of the
    # distirbutoin D.
#    print X.shape
    N,D = X.shape
    if (type(theta_init) == type(None)): # If not given an initialization
        mus = np.random.randn(D,K);
        mus = gf.normalize_module(mus.T).T
#        print mus

        Kappa_min = parameters["Kappa_min_init"]
        Kappa_max = parameters["Kappa_max_init"]
        
        kappas = np.random.uniform(Kappa_min,Kappa_max,K)
        kappas = kappas.reshape(1,K)
        ####### Put theta in the right format ###########
        theta = []
        for k in range(K):
            theta.append([mus[:,[k]],kappas[:,[k]]])
    else:
        return theta_init

    return  theta
def init_EM_params(D,K, pi_init = None, theta_init = None, Kappa_max = 20):
    # Here we will initialize the  parameters of the mixture model, that is,the
    # theta vectors of the K components and mixing coefficients %
    
    # MIXING COEFICIENTS
    # We set the mixing coefficients with uniform discrete distribution, this
    # way, the a priori probability of any vector to belong to any component is
    # the same.
    
    if (type(pi_init) == type(None)): # If not given an initialization
        pimix = np.ones((1,K));
        pimix = pimix*(1/float(K));
    else:
        pimix = np.array(pi_init).reshape(1,K)
#        print "grgerhbwe"
    # THETA PARAMETERS OF THE K COMPONENTS
    # Give random values to the theta parameters (parameters of the distribution)
    # In this case the parameters are theta = (mu, kappa)
    
    if (type(theta_init) == type(None)): # If not given an initialization
        mus = np.random.randn(D,K);
        mus = gf.normalize_module(mus.T).T
#        print mus
        kappas = np.random.uniform(-1,1,K) * Kappa_max
        kappas = kappas.reshape(1,K)
    else:
        mus = np.array(theta_init[0]).reshape((D,K))
        kappas = np.array(theta_init[1]).reshape((1,K))
    
    theta = [mus, kappas]
    
    return pimix, theta
Example #5
0
def get_X_trials_EM(X_All_labels,
                    label_classes,
                    max_trials=100,
                    channel_sel=[1, 4, 10]):

    max_trials = max_trials  # Maximum trials, preprocessed per class, to reduce computation

    Nclasses = len(label_classes)

    X_data_ave_EM_plus = []  # List of trials :)
    X_data_ave_EM_minus = []
    # Every label will have set of clusters,
    # this is the parameters of the clusters [K][[pimix], [thetas]]

    Ninit = 5
    K = 2
    verbose = 0
    T = 20
    ############# Preprocess by class ? ###################
    for i in range(Nclasses):
        Ntrials, Nsamples, Ndim = X_All_labels[i].shape
        Ntrials = np.min([Ntrials, max_trials])
        X_ave_EM_class_plus = []
        X_ave_EM_class_minus = []
        for i_trial in range(Ntrials):
            print "%i / %i " % (i_trial, Ntrials)
            # TODO no entiendo por que pelotas tengo que transformar aqui
            X_trial_samples = X_All_labels[i][i_trial, :, channel_sel].T
            #            print X_trial_samples.shape
            #            print channel_sel
            X_trial_samples = gf.normalize_module(X_trial_samples)
            #            print X_trial_samples.shape
            #            print X_trial_samples.shape
            logl, theta_list, pimix_list = EMl.run_several_EM(X_trial_samples,
                                                              K=K,
                                                              delta=0.1,
                                                              T=T,
                                                              Ninit=Ninit,
                                                              verbose=verbose)

            good_cluster_indx_plus = np.argmax(theta_list[-1][1])
            good_cluster_indx_minus = np.argmin(theta_list[-1][1])

            mu_plus = theta_list[-1][0][:, [good_cluster_indx_plus]]
            mu_minus = theta_list[-1][0][:, [good_cluster_indx_minus]]
            #            print mu.shape
            X_ave_EM_class_plus.append(mu_plus.T)
            X_ave_EM_class_minus.append(mu_minus.T)

        X_ave_EM_class_plus = np.concatenate(X_ave_EM_class_plus, axis=0)
        X_ave_EM_class_minus = np.concatenate(X_ave_EM_class_minus, axis=0)
        #        print X_ave_EM_class.shape
        X_data_ave_EM_plus.append(X_ave_EM_class_plus)
        X_data_ave_EM_minus.append(X_ave_EM_class_minus)

    return X_data_ave_EM_plus, X_data_ave_EM_minus
def init_HMM_params(D,
                    I,
                    pi_init=None,
                    A_init=None,
                    B_init=None,
                    Kappa_max=20):
    # Here we will initialize the  parameters of the HMM, that is,the initial
    # probabilities of the state "pi", the transition probabilities "A" and the
    # parameters of the probability functions "B"

    # Initial probabilities
    # We set the Initial probabilities with uniform discrete distribution, this
    # way, the a priori probability of any vector to belong to any component is
    # the same.
    if (type(pi_init) == type(None)):  # If not given an initialization
        pi = np.ones((1, I))
        pi = pi * (1 / float(I))
    else:
        pi = np.array(pi_init).reshape(1, I)

    # Transition probabilities "A"
    # We set the Transition probabilities with uniform discrete distribution, this
    # way, the a priori probability of going from a state i to a state j is
    # the same, no matter the j.

    if (type(A_init) == type(None)):  # If not given an initialization
        A = np.ones((I, I))
        #A(i,j) = aij = P(st = j | st-1 = i)  sum(A(i,:)) = 1
        for i in range(I):
            A[i, :] = A[i, :] * (1 / float(I))
    else:
        A = A_init
    # Parameters of the probability functions "B"
    # Give random values to the transit parameters. Since in this case, all
    # theta parameters theta(d,k), are the Expected value of a Bernoulli, we
    # asign values to them at random accoding to a uniform continuous
    # distribution in the support (0,1).

    if (type(B_init) == type(None)):  # If not given an initialization

        mus = np.random.randn(D, I)
        mus = gf.normalize_module(mus.T).T
        kappas = np.random.uniform(-1, 1, I) * Kappa_max
        kappas = kappas.reshape(1, I)

    else:
        mus = np.array(B_init[0]).reshape((D, I))
        kappas = np.array(B_init[1]).reshape((1, I))
    # We store the parameter of the clusters in B
    B = copy.deepcopy([mus, kappas])

    return pi, A, B
Example #7
0
def get_X_trials_EM (X_All_labels, label_classes, 
                         max_trials = 100, channel_sel= [1,4,10]):

    max_trials = max_trials  # Maximum trials, preprocessed per class, to reduce computation

    Nclasses = len(label_classes)

    X_data_ave_EM_plus = []  # List of trials :)
    X_data_ave_EM_minus = []
    # Every label will have set of clusters, 
    # this is the parameters of the clusters [K][[pimix], [thetas]]
    
    Ninit = 5
    K = 2
    verbose = 0
    T = 20
    ############# Preprocess by class ? ###################
    for i in range(Nclasses):
        Ntrials, Nsamples, Ndim = X_All_labels[i].shape
        Ntrials = np.min([Ntrials,max_trials])
        X_ave_EM_class_plus = []
        X_ave_EM_class_minus = []
        for i_trial in range(Ntrials):
            print "%i / %i " %(i_trial, Ntrials)
            # TODO no entiendo por que pelotas tengo que transformar aqui
            X_trial_samples = X_All_labels[i][i_trial,:,channel_sel].T
#            print X_trial_samples.shape
#            print channel_sel
            X_trial_samples = gf.normalize_module(X_trial_samples)
#            print X_trial_samples.shape
#            print X_trial_samples.shape
            logl,theta_list,pimix_list = EMl.run_several_EM(X_trial_samples, K = K, delta = 0.1, T = T,
                                        Ninit = Ninit, verbose = verbose)
            
            good_cluster_indx_plus = np.argmax(theta_list[-1][1])
            good_cluster_indx_minus = np.argmin(theta_list[-1][1])
            
            mu_plus = theta_list[-1][0][:,[good_cluster_indx_plus]]
            mu_minus = theta_list[-1][0][:,[good_cluster_indx_minus]]
#            print mu.shape
            X_ave_EM_class_plus.append(mu_plus.T)
            X_ave_EM_class_minus.append(mu_minus.T)
            
        X_ave_EM_class_plus = np.concatenate(X_ave_EM_class_plus, axis = 0)
        X_ave_EM_class_minus = np.concatenate(X_ave_EM_class_minus, axis = 0)
#        print X_ave_EM_class.shape
        X_data_ave_EM_plus.append(X_ave_EM_class_plus)
        X_data_ave_EM_minus.append(X_ave_EM_class_minus)
        
    return X_data_ave_EM_plus, X_data_ave_EM_minus
Example #8
0
def plot_trials_for_same_instance(X_data_trials, X_data_labels, X_train, y_train,
                                  colors = ["r","k"],  time_show = 100, normalize = True):

    # For a fixed time instant, the plotting of points from several trials of both classes
    gl.scatter_3D(0, 0,0, nf = 1, na = 0)
    for i in range(len(X_data_trials)):
        class_i = X_data_labels[i]
        if (normalize == True):
            caca = gf.normalize_module(X_data_trials[i][[time_show],:])
        else:
            caca = X_data_trials[i][[time_show],:]
        
        caca = caca.flatten()
        gl.scatter_3D(caca[0],caca[1],caca[2], 
                          nf = 0, na = 0, color = colors[class_i],labels = ["Trials for the same time instant for different classes", "D1","D2","D3"])
Example #9
0
def preprocess_data_set(X_All_labels,
                        label_classes,
                        max_trials=100,
                        channel_sel=None,
                        normalize=True):
    # Subselect channels and trials and then normalize modulus

    max_trials = max_trials  # Maximum trials, preprocessed per class, to reduce computation
    channel_sel = channel_sel  # Subset of selected channels

    Nclasses = len(label_classes)
    label_numbers = range(Nclasses)

    X_data_trials = []  # List of trials :)
    X_data_labels = []  # Labels of the trials :)

    # Every label will have set of clusters,
    # this is the parameters of the clusters [K][[pimix], [thetas]]

    ############# Preprocess by class ? ###################
    for i in range(Nclasses):
        Ntrials, Nsamples, Ndim = X_All_labels[i].shape
        # Limit the number of trials processed by labels
        Ntrials = np.min([Ntrials, max_trials])
        if (type(channel_sel) == type(None)):
            channel_sel = range(Ndim)

        for nt in range(Ntrials):
            ################################################################
            ######## Preprocessing ! ##############################
            ###############################################################
            X_trial = X_All_labels[i][nt, :, :]
            X_trial = X_trial[:, channel_sel]
            #            X_trial = X_trial - np.sum(X_trial, axis = 1).reshape(X_trial.shape[0],1)
            #        scaler = preprocessing.StandardScaler().fit(X_trial)
            #        X_trial = scaler.transform(X_trial)
            if (normalize == True):
                X_trial = gf.normalize_module(X_trial)

            X_data_trials.append(X_trial)
            X_data_labels.append(i)

    # Now have a normal machine learning problem :)
    # X_data_trials,  X_data_labels

    return X_data_trials, X_data_labels
Example #10
0
def preprocess_data_set (X_All_labels, label_classes, 
                         max_trials = 100, channel_sel = None,
                         normalize = True):
    # Subselect channels and trials and then normalize modulus
                         
    max_trials = max_trials  # Maximum trials, preprocessed per class, to reduce computation
    channel_sel = channel_sel  # Subset of selected channels

    Nclasses = len(label_classes)
    label_numbers = range(Nclasses)
    
    X_data_trials = []  # List of trials :)
    X_data_labels = []  # Labels of the trials :)

    # Every label will have set of clusters, 
    # this is the parameters of the clusters [K][[pimix], [thetas]]
    
    ############# Preprocess by class ? ###################
    for i in range(Nclasses):
        Ntrials, Nsamples, Ndim = X_All_labels[i].shape
        # Limit the number of trials processed by labels
        Ntrials = np.min([Ntrials,max_trials])
        if (type(channel_sel) == type(None)):
            channel_sel = range(Ndim)
            
        for nt in range(Ntrials):
            ################################################################
            ######## Preprocessing ! ##############################
            ###############################################################
            X_trial = X_All_labels[i][nt,:,:]
            X_trial = X_trial[:,channel_sel]
#            X_trial = X_trial - np.sum(X_trial, axis = 1).reshape(X_trial.shape[0],1)
    #        scaler = preprocessing.StandardScaler().fit(X_trial)
    #        X_trial = scaler.transform(X_trial)    
            if (normalize == True):
                X_trial = gf.normalize_module(X_trial)
            
            X_data_trials.append(X_trial)
            X_data_labels.append(i)
            
    # Now have a normal machine learning problem :)
    # X_data_trials,  X_data_labels
    
    return X_data_trials, X_data_labels
for i in range(Nsa):
    probs.append([])
    for j in range(Nsa):
        XdataSample = [np.sin(Xthetta[i])*np.cos(Xfi[j]),
                       np.sin(Xthetta[i])*np.sin(Xfi[j]), 
                       np.cos(Xthetta[i])]
        probs[i].append(Wad.Watson_pdf(XdataSample,mu,kappa ))

probs = np.array(probs).T

## Plotting
gl.set_subplots(1,3)
## Plot it in terms of (angle, prob)
gl.plot_3D(Xthetta,Xfi, np.array(probs))
gl.plot_3D(Xthetta,Xfi, np.array(probs), project = "spher")


mu = np.random.randn(5,1);
mu = gf.normalize_module(mu.T).flatten()
## Generate samples
RandWatson = Was.randWatson(Nsampling, mu, kappa)
gl.scatter_3D(RandWatson[:,0],RandWatson[:,1], RandWatson[:,2])

mu_est = Wae.get_MLmean(RandWatson)
kappa_est = Wae.get_MLkappa(mu_est, RandWatson)


mu_est2, kappa_est2 = Wae.get_Watson_muKappa_ML(RandWatson)
print "Real: ", mu, kappa
print "Estimate: ", mu_est2, kappa_est2
Example #12
0
            Xdata = np.concatenate((Xdata, copy.deepcopy(Xdata_chain)), axis=0)
        k = 1

################################################################
######## Perform the EM !! ###############
###############################################################
perform_EM = 1
if (perform_EM):
    K = 3
    D = Xdata.shape[1]

    pi_init = np.ones((1, K))
    pi_init = pi_init * (1 / float(K))

    mus_init = np.random.randn(D, K)
    mus_init = gf.normalize_module(mus_init.T).T

    kappas_init = np.random.uniform(-1, 1, K) * 10
    kappas_init = kappas_init.reshape(1, K)
    theta_init = [mus_init, kappas_init]

    ## RUN only one !!
    logl, theta_list, pimix_list = EMl.EM(Xdata,
                                          K=K,
                                          delta=0.1,
                                          T=100,
                                          pi_init=pi_init,
                                          theta_init=theta_init)

    ## RUN several !!
Example #13
0
def normalize_trialList(tL):
    # Normalize all the trials in a list
    tL_norm = []
    for i in range(len(tL)):  # Put the data in the sphere.
        tL_norm.append(gf.normalize_module(tL[i]))
    return tL_norm
Example #14
0
import general_func as gf
plt.close("all")

################################################################
######## Load and combine 3 sets ##############################
###############################################################

folder = "./HMM_data/"
HMM_list = pkl.load_pickle(folder + "HMM_datapoints.pkl", 1)
HMM_list2 = pkl.load_pickle(folder + "HMM2_datapoints.pkl", 1)

for i in range(len(HMM_list)):
    chain = HMM_list[i]
    Nsamples, Ndim = HMM_list[i].shape
    chain_noise = np.random.rand(Nsamples, Ndim) / 10
    HMM_list[i] = gf.normalize_module(chain + chain_noise)

for i in range(len(HMM_list2)):
    chain = HMM_list2[i]
    Nsamples, Ndim = HMM_list2[i].shape
    chain_noise = np.random.rand(Nsamples, Ndim) / 10
    HMM_list2[i] = gf.normalize_module(chain + chain_noise)

D = HMM_list[0].shape[1]

## Examples Crosvalidation fot HMM
CV_HMM = 0
if (CV_HMM):

    ## Run 1
    States = [1, 2, 3, 4, 5, 6, 7]
Example #15
0
###############################################################
max_trials = 100  # Number of trials per class
Ntrials, NtimeSamples, Ndim = X_All_labels[0].shape
possible_channels = range(Ndim)
Nchannels_chosen = 5  # We select this number of channels

channel_sel = np.random.permutation(possible_channels)[0:Nchannels_chosen].flatten().tolist()      # Randomize the array of index
#channel_sel = [20, 21, 22]

X_data_trials, X_data_labels = dp.preprocess_data_set (
                                X_All_labels, label_classes, 
                                max_trials = max_trials, channel_sel= channel_sel)

X_data_ave = dp.get_timeSeries_average_by_label(X_All_labels, channel_sel = channel_sel)
for i in range (len(X_data_ave)):  # Put the data in the sphere.
    X_data_ave[i] = gf.normalize_module(X_data_ave[i])


################# Separate in train and validation ############                    
X_train, X_test, y_train, y_test = train_test_split(X_data_trials, X_data_labels, test_size=0.50, random_state = 0, stratify = X_data_labels)

####################################################### 
######################### EM ########################### 
####################################################### 

EM_flag = 1

if (EM_flag):
    Ninit = 10
    K  =  6
    verbose = 0
Example #16
0
def normalize_trialList(tL):
    # Normalize all the trials in a list
    tL_norm = []
    for i in range (len(tL)):  # Put the data in the sphere.
        tL_norm.append( gf.normalize_module(tL[i]))
    return tL_norm