Esempio n. 1
0
 def decomposition_interact_re_button(self, foo):
     dataset = self.EEMstack_cw[self.datlist_cw.index(
         self.range1.value):self.datlist_cw.index(self.range2.value) + 1]
     if self.decomposition_method_list.value == 'parafac':
         factors = parafac(dataset, rank=self.rank_display.value)
     elif self.decomposition_method_list.value == 'non_negative_parafac':
         factors = non_negative_parafac(dataset,
                                        rank=self.rank_display.value)
     elif self.decomposition_method_list.value == 'test_function':
         factors = non_negative_parafac(dataset,
                                        rank=self.rank_display.value,
                                        fixed_modes=[0, 1],
                                        init="random")
     I_0 = factors[1][0]
     J_0 = factors[1][1]
     K_0 = factors[1][2]
     decomposition_reconstruction_interact(
         I_0,
         J_0,
         K_0,
         self.EEMstack_cw[self.datlist_cw.index(self.data_to_view.value)],
         self.Em_range_cw,
         self.Ex_range_cw,
         self.datlist_cw[self.datlist_cw.index(self.range1.value):self.
                         datlist_cw.index(self.range2.value) + 1],
         self.data_to_view.value,
         crange=self.crange_cw.value)
Esempio n. 2
0
    def factorize(self, n_factors, new_mz_len=None, gauss_params=None):

        factors = []
        if self.n_concatenated != 1:
            nnp = non_negative_parafac(self.concatenated_grid,
                                       n_factors,
                                       init='random')  #, n_iter_max=50)
            for i in range(n_factors):
                factors.append(
                    Factor(tensor_idx=self.tensor_idx,
                           charge_state=self.charge_state,
                           rts=nnp[1][0].T[i],
                           dts=nnp[1][1].T[i],
                           mz_data=nnp[1][2].T[i],
                           factor_idx=i,
                           n_factors=n_factors,
                           lows=self.lows,
                           highs=self.highs,
                           abs_mz_low=self.mz_bin_low,
                           n_concatenated=self.n_concatenated))
            return factors

        if new_mz_len != None:
            if gauss_params != None:
                grid, lows, highs = interpolate(self.full_grid_out, new_mz_len,
                                                gauss_params[0],
                                                gauss_params[1])
                nnp = non_negative_parafac(grid, n_factors,
                                           init='random')  #, n_iter_max = 50)
            else:
                grid, lows, highs = interpolate(self.full_grid_out, new_mz_len)
                nnp = non_negative_parafac(grid, n_factors,
                                           init='random')  #, n_iter_max = 50)
        else:
            lows, highs = self.lows, self.highs
            if gauss_params != None:
                grid = self.gauss(self.full_grid_out, gauss_params[0],
                                  gauss_params[1])
            else:
                grid = self.full_grid_out

        nnp = non_negative_parafac(grid, n_factors,
                                   init='random')  #, n_iter_max = 50)
        for i in range(n_factors):
            factors.append(
                Factor(tensor_idx=self.tensor_idx,
                       charge_state=self.charge_state,
                       rts=nnp[1][0].T[i],
                       dts=nnp[1][1].T[i],
                       mz_data=nnp[1][2].T[i],
                       factor_idx=i,
                       n_factors=n_factors,
                       lows=lows,
                       highs=highs,
                       abs_mz_low=self.mz_bin_low,
                       n_concatenated=self.n_concatenated))
        return factors
Esempio n. 3
0
def perform_decomposition(tensor, r, weightFactor=2):
    """ Perform PARAFAC decomposition. """
    fac = non_negative_parafac(tensor, r, tol=1.0e-10, n_iter_max=6000)
    fac = tl.cp_normalize(fac)
    fac.factors[weightFactor] *= fac.weights[
        np.newaxis, :]  # Put weighting in designated factor
    return fac.factors
def tensDecomp(tens, rnk):
    # Make this check as we need to reduce the number of dimensions and not increase it
    if (rnk >= len(tens[0][0])):
        print("Error in data: rank is greater than number of features")
        return
    #factors = parafac(tens, rnk)
    factors = non_negative_parafac(tens, rnk)
    #print(factors[0])
    return factors
Esempio n. 5
0
def factorTensor(tensor, numComps):
    """ Takes Tensor, and mask and returns tensor factorized form. """
    tfac = non_negative_parafac(np.nan_to_num(tensor),
                                rank=numComps,
                                mask=np.isfinite(tensor),
                                n_iter_max=5000,
                                tol=1e-9)
    tensor = tensor.copy()
    tensor[np.isnan(tensor)] = tl.cp_to_tensor(tfac)[np.isnan(tensor)]
    return non_negative_parafac_hals(tensor, numComps, n_iter_max=5000)
Esempio n. 6
0
def separate_kernel(kernel, max_rank='sqrt'):
    if max_rank is None:
        max_rank = min(*kernel.shape)
    elif max_rank == 'sqrt':
        max_rank = int(ceil(sqrt(max(*kernel.shape))))
    else:
        max_rank = min(*kernel.shape) // 3

    if kernel.ndim == 1:
        return kernel
    else:
        P = non_negative_parafac(tensor(kernel), rank=max_rank)
        return [D.T for D in P]
    return all_ind


def show_proj(factors, matlab_data, ex1_em0):
    color = ["r", "g", "b"]
    for i in range(3):
        mat1 = np.array(factors[1][1]).transpose()[i]
        w = tl.norm(factors[1][0][i])

        sq_mat = np.array(mat1).reshape(61, 201)
        m = np.max(sq_mat)
        ind = tl.where(sq_mat == m)
        if (ex1_em0 == 1):
            x = matlab_data["EmAx"][0]
            y = w * tl.transpose(sq_mat[ind[0]])
        else:
            x = matlab_data["ExAx"][0]
            y = w * np.transpose(tl.transpose(sq_mat)[ind[1]])
        plt.grid()
        plt.plot(x, y, color[i])
    plt.show()


if __name__ == "__main__":
    mat = scipy.io.loadmat('C:/Users/Tatiana/Desktop/amino.mat')

    ex, em = transform_data(mat)
    factors = non_negative_parafac(mat["X"], rank=3, n_iter_max=2000)
    show_proj(factors, mat, 0)
    show_proj(factors, mat, 1)
Esempio n. 8
0
    entry = row.split(sep=',')
    for i in range(441):
        column = i * 4
        Box1List.append(float(entry[column]))
        Box2List.append(float(entry[column + 1]))
        Box3List.append(float(entry[column + 2]))
        Box4List.append(float(entry[column + 3]))

# Creating AllBoxes, which is a huge list of all SpectraMatrix data for easy tensor creation
AllBoxes = Box1List + Box2List + Box3List + Box4List

theTensor = np.array(AllBoxes).reshape(
    (4, 200,
     441))  # The tensor, in the proper shape, that we hope to do CPT on

theCPT = non_negative_parafac(theTensor, rank=4)  # The CPT decomposition

matlab5factor1 = pd.read_csv('data files/factor1rank5Matlab.csv', header=None)
matlab5factor2 = pd.read_csv('data files/factor2rank5Matlab.csv', header=None)
matlab5factor3 = pd.read_csv('data files/factor3rank5Matlab.csv', header=None)
matlab8factor1 = pd.read_csv('data files/factor1rank8Matlab.csv', header=None)
matlab8factor2 = pd.read_csv('data files/factor2rank8Matlab.csv', header=None)
matlab8factor3 = pd.read_csv('data files/factor3rank8Matlab.csv', header=None)

matlab5factor2 = th.attachXAxis(matlab5factor2)

# ------- Cool subplots of the MATLAB CPT -------

plt.figure(figsize=(15, 9))
plt.suptitle('Factor 2 After CP Tensor Decomposition', fontsize=22)
    print(filename)
    if os.path.isfile(filename):
        svd = np.load(filename)
        u = svd['u'][:, :rank]
        s = svd['s'][:rank]
        vt = svd['vt'][:rank, :]
        T[graph_idx, :, :] = np.dot(np.dot(u, np.diag(s)), vt)
        del svd
        continue
    else:
        continue
    graph_idx += 1

factors = non_negative_parafac(T,
                               rank=comm,
                               n_iter_max=100,
                               verbose=1,
                               init='random',
                               tol=1e-8)
del T

save = True
load = False

fileName = '/media/garner1/hdd1/gpseq' + '/info_10000G' + '/nnparafac_WOintraChrom' + '_rank' + str(
    rank) + '_sample' + str(ind) + '_size' + str(samples) + '.pkl'
fileObject = open(fileName, 'wb')

if save:
    pkl.dump(factors, fileObject)
    fileObject.close()
Esempio n. 10
0
def analyze_with_tensor_decomposition(mat,x,y,z):
    print("Tensor Decomposition starts...")
    tensor = tl.tensor(mat)
    #tensor = tl.tensor(np.arange(24).reshape((3, 4, 2)))
    print("The given tensor:")
    
    
    
    
    
    start_analysis_fignmbr=0
    
    weights, factors = non_negative_parafac(tensor, rank=rank)
    
    print("Factors: ")
    print(factors)
    
    print("Slice: ")
    print(factors[0])
    print(list(factors[0][:,0]))
    user=[i for i in range(0,x)]
    print(user)
    thread=[i for i in range(0,y)]
    time=[i for i in range(0,z)]

    userdataset=[]
    threaddataset=[]
    timedataset=[]

    print("Tensor figures drawing..")
    plt.figure(1)
    for i in range(start_analysis_fignmbr,rank*3):
        fignmbr=(i%3)+1
        col=int(i/3)
        row=int(i%3)
        
        print("row: ",row," col: ",col," fignmbr: ",fignmbr)
        y=list(factors[row][:,col])
        #print(y)
        
        ax=plt.subplot(1,3,fignmbr)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        if i%3==0:
            plt.plot(user,y,'r.')
            plt.xlabel("User")
            userdataset.append(y)
            
            
        if i%3==1:
            plt.plot(thread,y,'c.')
            plt.xlabel("Thread")
            threaddataset.append(y)
            
            
        if i%3==2:
            plt.plot(time,y,'g.')
            plt.xlabel("Monthly TIme Bin")
            timedataset.append(y)
            
        if fignmbr%3==0:
            
            directory='figures/component ' + str(int((i+1)/3))
            plt.savefig(directory)
           
            plt.close()
    

    u_t=0.1; # these values are invalid because threshold are calculated inside the callee function
    th_t=0.05
    t_t=0.005

    print("Finding and drawing scree plots in figure anamalous...")
    map_component_userlist,map_component_user_value=find_anomalous_component(userdataset,threaddataset,"Number of User","Number of Thread",u_t,th_t,1)
    map_component_timelist,map_component_time_value=find_anomalous_component(timedataset,userdataset,"Number of Time Bin","Number of User",t_t,u_t,3)
    map_component_threadlist,map_component_thread_value=find_anomalous_component(threaddataset,timedataset,"Number of Thread","Number of Time Bin",th_t,t_t,2)
    print("checking")
    print(map_component_userlist.keys())
    print(map_component_userlist[0])
    print(map_component_user_value[0])
    return map_component_userlist,map_component_user_value,map_component_timelist,map_component_time_value,map_component_threadlist,map_component_thread_value
    filename = os.path.join(path, dirname + '/coords.csv_sparse_graph.npz')
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            mat = scipy.sparse.load_npz(filename).astype(np.float32).todense()
            # sub_threshold_indices = mat < 0.5
            # mat[sub_threshold_indices] = 0
            T[count, :, :] = mat
            del mat
        continue
    else:
        continue
    count += 1

factors = non_negative_parafac(T,
                               rank=rank,
                               verbose=1,
                               init='random',
                               tol=1e-4)
# print(factors[0])
# print([f.shape for f in factors[1]])
# print([tl.norm(factors[1][0][:,ind],2)*tl.norm(factors[1][1][:,ind],2)*tl.norm(factors[1][2][:,ind],2) for ind in range(rank)])
del T

import pickle as pkl
save = True
load = False

fileName = path + '/NNparafac' + '_rank' + str(rank) + '_sample' + str(
    ind) + '_size' + str(samples) + '.pkl'
fileObject = open(fileName, 'wb')
Esempio n. 12
0
def seq_parafac(input_tensor,
                max_rank,
                nb_trial,
                pred_df,
                tol=1e-07,
                mode='non-negative'):
    """Sequential TCA for model selection
	
	This method computes TCA with a number of components ranging from 1 to the maximum rank indicated,
	and stores reconstruction error, similarity, sparsity for each model obtained. It fits also random
	forest classifiers to predict reward and odor for each trial using trial factors, and it stores
	the associated prediction accuracy.
	
	Arguments:
		input_tensor {array} -- 3-dimensional activity array
		max_rank {scalar} -- maximum rank for TCA
		nb_trial {scalar} -- number of replication of same TCA
		pred_df {pandas dataframe} -- reward and odor information for each trial
	
	Keyword Arguments:
		tol {scalar} -- tolerance for optimization convergence
		mode {string} -- version of TCA to compute, classic or non-negative (default: {'non-negative'})
	
	Returns:
		pandas err_df -- reconstruction error for each TCA run
		pandas sim_df -- similarity against best fit for each TCA run
		pandas spa_df -- sparsity for each TCA run
		pandas odor_df -- odor prediction accuracy for each TCA run
		pandas rew_df -- reward prediction accuracy for each TCA run
	"""

    # lists used for output dataframes
    rank_err = []
    rank_sim = []
    err_list = []
    sim_list = []
    spa_list = []

    odor_acc = []
    odor_std = []
    rew_acc = []
    rew_std = []

    for rank in np.arange(1, max_rank + 1):

        # in this list we store factors extracted with TCA for each run
        pred_fac = []
        # minimal error initialized at maximum 1, useful to identify best fit model
        min_err = 1
        # index of the best fit model in the factors list
        min_idx = 0

        # we iterate over nb_trial, the number of replicates of TCA to run
        # it allows stability check, i.e by computing sparsity
        for trial in range(nb_trial):

            # verbose to know which replicate is running
            print('Trial', trial)

            # build a list useful for err_df
            rank_err.append(rank)

            if mode == 'non-negative':
                # where TCA is actually computed, here in its non-negative version
                pred_fac.append(
                    non_negative_parafac(input_tensor,
                                         rank=rank,
                                         n_iter_max=1000,
                                         init='svd',
                                         tol=tol,
                                         verbose=1))
            else:
                # where TCA is actually computed, in its classic version
                pred_fac.append(
                    parafac(input_tensor,
                            rank=rank,
                            n_iter_max=5000,
                            init='random',
                            tol=tol))
            # we store all factors in a list, to be able to compute model similarity in the end

            # transform pred_fac from kruskal form (list of factors) to full-tensor form
            pred_tensor = tl.kruskal_to_tensor(pred_fac[trial])
            # compute reconstruction error, L2 distance from predicted to original tensor
            err = rec_err(input_tensor, pred_tensor)
            err_list.append(err)

            # here we compute sparsity, the proportion of almost-zero elements
            nb_nonzero = 0
            tot_size = 0

            for i in range(len(pred_fac[trial])):
                nb_nonzero += np.count_nonzero(np.round(pred_fac[trial][i], 2))
                tot_size += pred_fac[trial][i].size

            spa = 1 - nb_nonzero / tot_size
            spa_list.append(spa)

            # we shuffle samples matrix (here trial factors) and labels (odor and reward)
            # using same permutation
            X, y_odor, y_rew = shuffle(pred_fac[trial][2],
                                       pred_df['Odor'].tolist(),
                                       pred_df['Reward'].tolist())

            # initialize random forest classifier
            clf = RandomForestClassifier(n_estimators=50,
                                         max_depth=None,
                                         min_samples_split=2,
                                         max_features='sqrt')

            # scale the data before fitting
            X = StandardScaler().fit_transform(X)

            # compute cross validated prediction accuracy for odor and reward
            odor_acc.append(cross_val_score(clf, X, y_odor, cv=8).mean())
            odor_std.append(cross_val_score(clf, X, y_odor, cv=8).std())
            rew_acc.append(cross_val_score(clf, X, y_rew, cv=8).mean())
            rew_std.append(cross_val_score(clf, X, y_rew, cv=8).std())

            # we keep track of the model having lowest reconstruction error
            # we will use this model as a reference to compute model similarity
            if err < min_err:
                min_err = err
                min_idx = trial

        # we iterate again over all computed models to calculate similarity
        # versus best fit model
        for trial in range(nb_trial):

            # if the model is the best fit, do nothing
            if trial == min_idx:
                continue

            # build a list useful for sim_df
            rank_sim.append(rank)

            # align factors to compute similarity
            sim_list.append(
                tt.kruskal_align(tt.tensors.KTensor(pred_fac[min_idx]),
                                 tt.tensors.KTensor(pred_fac[trial]),
                                 permute_U=True,
                                 permute_V=True))

    # build dataframes to store results
    err_df = pd.DataFrame(data=np.transpose([rank_err, err_list]),
                          columns=['Rank', 'Reconstruction Error'])
    sim_df = pd.DataFrame(data=np.transpose([rank_sim, sim_list]),
                          columns=['Rank', 'Similarity'])
    spa_df = pd.DataFrame(data=np.transpose([rank_err, spa_list]),
                          columns=['Rank', 'Sparsity'])
    odor_df = pd.DataFrame(data=np.transpose([rank_err, odor_acc, odor_std]),
                           columns=[
                               'Rank', 'Accuracy - Odor Prediction',
                               'Std - Odor Prediction'
                           ])
    rew_df = pd.DataFrame(data=np.transpose([rank_err, rew_acc, rew_std]),
                          columns=[
                              'Rank', 'Accuracy - Reward Prediction',
                              'Std - Reward Prediction'
                          ])

    return err_df, sim_df, spa_df, odor_df, rew_df
Esempio n. 13
0
                break

final_tensor = tl.tensor(final_tensor, dtype='float64')

#Training Tucker and PARAFAC tensor decomposition models
core, factors = non_negative_tucker(final_tensor,
                                    ranks=[120, 20, 220],
                                    n_iter_max=100,
                                    init='random',
                                    tol=0.00001,
                                    random_state=None,
                                    verbose=True)
factors = non_negative_parafac(final_tensor,
                               rank=70,
                               n_iter_max=100,
                               init='random',
                               tol=0.00001,
                               random_state=None,
                               verbose=True)

reconstructed_tensor = tl.kruskal_to_tensor(factors)

nonzero_mat = np.nonzero(final_tensor)

#Computing RMSE
error = 0
for i in range(len(nonzero_mat[0])):
    error += final_tensor[nonzero_mat[0][i]][nonzero_mat[1][i]][
        nonzero_mat[2][i]] - reconstructed_tensor[nonzero_mat[0][i]][
            nonzero_mat[1][i]][nonzero_mat[2][i]]
Esempio n. 14
0
# factors of the NCP, and transform these factors (and factors weights) into
# an instance of the CPTensor class:

weights_init, factors_init = initialize_nn_cp(tensor, init='random', rank=10)

cp_init = CPTensor((weights_init, factors_init))

##############################################################################
# Non-negative Parafac
# -----------------------
# From now on, we can use the same ``cp_init`` tensor as the initial tensor when
# we use decomposition functions. Now let us first use the algorithm based on
# Multiplicative Update, which can be called as follows:

tic = time.time()
tensor_mu, errors_mu = non_negative_parafac(tensor, rank=10, init=deepcopy(cp_init), return_errors=True)
cp_reconstruction_mu = tl.cp_to_tensor(tensor_mu)
time_mu = time.time()-tic

##############################################################################
# Here, we also compute the output tensor from the decomposed factors by using
# the cp_to_tensor function. The tensor cp_reconstruction_mu is therefore a
# low-rank non-negative approximation of the input tensor; looking at the
# first few values of both tensors shows that this is indeed
# the case but the approximation is quite coarse.

print('reconstructed tensor\n', cp_reconstruction_mu[10:12, 10:12, 10:12], '\n')
print('input data tensor\n', tensor[10:12, 10:12, 10:12])

##############################################################################
# Non-negative Parafac with HALS
delt1 = 10

for i in range(len(names)):
    x, y, z = read_file(default_way + names[i] + '.txt')
    z = erase_first_line(x, y, z)
    z = erase_second_line(x, y, z)
    #fig = plt.figure(figsize=(5, 5))
    #show_data(x, y, z, fig)
    tensor_data.append(z)

#plt.show()
# получение факторв
rank = 12
for rank in range(15, 16, 1):
    factors = decomp.non_negative_parafac(np.array(tensor_data),
                                          rank,
                                          n_iter_max=2000,
                                          tol=1e-6)
    fig = plt.figure(figsize=(5, 5))
    print(factors[0])
    plt.subplot(311)
    for i in range(rank):
        z_f1 = np.transpose(factors[1][0])[i]
        plt.plot(np.linspace(1, len(names), len(names), endpoint=True), z_f1)

    plt.subplot(312)
    for i in range(rank):
        z_f1 = np.transpose(factors[1][1])[i]
        plt.plot(x, z_f1)

    plt.subplot(313)
    for i in range(rank):
Esempio n. 16
0
            mat = scipy.sparse.load_npz(filename).astype(np.float32).todense()
            # sub_threshold_indices = mat < 0.5
            # mat[sub_threshold_indices] = 0
            u, s, vh = tl.partial_svd(
                mat, n_eigenvecs=3
            )  # 3 should be enough because the data is x,y,z times beads
            T[count, :, :] = np.dot(u, np.dot(np.diag(s), vh))
            del mat
        continue
    else:
        continue
    count += 1

factors = non_negative_parafac(T,
                               rank=rank,
                               n_iter_max=10000,
                               verbose=1,
                               init='svd',
                               tol=1e-10)
# print(factors[0])
# print([f.shape for f in factors[1]])
# print([tl.norm(factors[1][0][:,ind],2)*tl.norm(factors[1][1][:,ind],2)*tl.norm(factors[1][2][:,ind],2) for ind in range(rank)])
del T

import pickle as pkl
save = True
load = False

fileName = path + '/NNparafac' + '_rank' + str(rank) + '_sample' + str(
    ind) + '_size' + str(samples) + '.pkl'
fileObject = open(fileName, 'wb')