コード例 #1
0
ファイル: helpers.py プロジェクト: simonamtoft/tfde-tfp
def compute_fair(K_tt, M, even=False):
    """ Computes fair parameters.
    Computes parameters for the GMM and CP models, 
    such that the three models can be compared fairly.

    Input
      K_tt  (int)   : The value of K for the Tensor Train model.
      M     (int)   : The number of dimensions of the data, 
                      which the models are trained on
      even  (bool)  : Whether the Ks are required to be even.

    Return
      K_tt  (int) : Same as input.
      K_gmm (int) : The value of K for the GMM model.
      K_cp  (int) : The value of K for the CP model.
    """

    if even:
        addi = 2
    else:
        addi = 1

    # number of free parameters for TT
    n_tt = m.TensorTrainGaussian(K_tt, M).n_parameters()

    # number of free parameters for GMM
    K_gmm = K_tt
    n_gmm = m.GMM(K_gmm, M).n_parameters()
    while n_gmm < n_tt:
        K_gmm += addi
        n_gmm = m.GMM(K_gmm, M).n_parameters()
    # n_gmm = K_gmm * (1 + M + M*M)
    # while n_gmm < n_tt:
    #     K_gmm += addi
    #     n_gmm = K_gmm * (1 + M + M*M)

    # number of free parameters for CP
    K_cp = K_gmm
    n_cp = m.CPGaussian(K_cp, M).n_parameters()
    while n_cp < n_tt:
        K_cp += addi
        n_cp = m.CPGaussian(K_cp, M).n_parameters()

    return K_gmm, K_cp
コード例 #2
0
ファイル: CIFAR10.py プロジェクト: simonamtoft/tfde-tfp
ax[1].plot(Ks, error_val, 'r.-', markersize=10)
ax[1].set_ylabel('Negative log-likelihood')
ax[1].set_xlabel('K')
ax[1].set_title('Selecting K for ' + model_name + ' model')
ax[1].legend(['Train', 'Validation'])
ax[1].grid('on')
f.suptitle(name)
plt.show()

#%% Fit new model      (Set optimal K either directly of from cross-validation)
idx = np.argmin(error_val)
K_opt = Ks[idx]
# K_opt = 30

if model_name == 'CP':
    model = m.CPGaussian(K_opt, M)
else:
    model = m.TensorTrainGaussian(K_opt, M)

epochs = 1000

# Split into batches
ds_train = d.to_tf_dataset(X_train, batch_size=batch_size)
ds_train_small = d.to_tf_dataset(X_train_small, batch_size=batch_size)
ds_val = d.to_tf_dataset(X_val, batch_size=batch_size)
ds_val_small = d.to_tf_dataset(X_val_small, batch_size=batch_size)

# Train and plot
losses_train, losses_val = model.fit_val(ds_train,
                                         ds_val,
                                         epochs,
コード例 #3
0
ファイル: CV_functions.py プロジェクト: simonamtoft/tfde-tfp
def CV_1_fold(data, Ks=np.arange(4, 8, 2), model_name='TT', 
              CV_splits=5, epochs=200, optimizer=None, batch_size=100):
    """ 1-fold Cross Validation
    
    Input
        data        :   The data to fit and test on. The method will split this 
                        into a training and testing self itself.
        Ks          :   Array or int of K values for the model
        model_name  :   Name of model to test ('TT', 'CP', 'GMM')
        epochs      :   How many epochs to use for fitting of the model
        optimizer   :   A tf.keras.optimizers to use for fitting the model
        batch_size  :   The desired batch size for the training data
    
    Return
        err_tr      :   Error on the training set
        err_tst     :   Error on the testing set
    """ 

    if optimizer == None:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

    if np.isscalar(Ks): # Transform
        Ks = (Ks,)

    M = data.shape[1] # Dimension of data
    
    # Split data and shuffle
    CV = KFold(n_splits=CV_splits, shuffle=True)
    
    # Initialize error arrays
    error_train = np.zeros((CV_splits, len(Ks)))
    error_test = np.zeros((CV_splits, len(Ks)))
    
    for i, (train_index, test_index) in enumerate(CV.split(data)):
        print(f'Cross-validation fold {i+1}/{CV_splits}')
        
        # split and normalize data
        X_train, X_test = data_split(data, train_index, test_index, batch_size)
        
        # create TF training dataset 
        ds_train = d.to_tf_dataset(X_train, batch_size=batch_size)
        
        for j, K in enumerate(Ks):
            # Fit model to training data
            if model_name == 'TT':
                model = m.TensorTrainGaussian(K, M)
                train_loss = model.fit(ds_train, epochs, optimizer, mute=True)
                test_loss = model(X_test)
            elif model_name == 'CP':
                model = m.CPGaussian(K, M)
                train_loss = model.fit(ds_train, epochs, optimizer, mute=True, mu_init='random')
                test_loss = model(X_test)
            elif model_name == 'GMM':
                model = GaussianMixture(n_components=K, covariance_type='full', n_init=5, init_params='random')
                model.fit(X_train)
                train_loss = [-model.score(X_train)]
                test_loss = model.score_samples(X_test)
            else:
                raise Exception('Provided model_name not valid')
            
            error_train[i, j] = train_loss[-1]
            error_test[i, j] = -tf.reduce_mean(test_loss).numpy()
    
        # Get average error across splits
        err_tr = np.mean(error_train, axis=0) # mean training error over the CV folds
        err_tst = np.mean(error_test, axis=0) # mean test error over the CV folds
    return err_tr, err_tst
コード例 #4
0
ファイル: CV_functions.py プロジェクト: simonamtoft/tfde-tfp
def CV_holdout(X_train,X_val, Ks=np.arange(4, 8, 2), model_name='TT', 
              epochs=200, optimizer=None, batch_size=100, N_init = 5):
    """ Holdout Cross Validation to find optimal K
    
    Input
        data        :   The data to fit and test on. The method will split this 
                        into a training and testing self itself.
        Ks          :   Array or int of K values for the model
        model_name  :   Name of model to test ('TT', 'CP', 'GMM')
        epochs      :   How many epochs to use for fitting of the model
        optimizer   :   A tf.keras.optimizers to use for fitting the model
        batch_size  :   The desired batch size for the training data
        N_init      :   How many initalizations the model should do
    
    Return CV_dict with values
        error_train      :   Error on the training set
        error_val        :   Error on the testing set
        learning_curves  : Learning curves for all the K
    """ 

    if optimizer == None:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

    if np.isscalar(Ks): # Transform
        Ks = (Ks,)
        
    mute = True

    M = X_train.shape[1] # Dimension of data
    
    # create TF training dataset 
    ds_train = d.to_tf_dataset(X_train, batch_size=batch_size)
    ds_val = d.to_tf_dataset(X_val, batch_size=batch_size)
    
    # Initialize error arrays
    error_train = np.zeros((len(Ks)))
    error_val = np.zeros((len(Ks)))
    train_learning_curves = []
    val_learning_curves = []
    
    for i,K in tqdm(enumerate(Ks),desc='Fitting for K',total=len(Ks),position=0,leave=True):
        # Fit model to training data
        if model_name == 'TT':
            model = m.TensorTrainGaussian(K, M)
            train_loss,val_loss = model.fit_val(ds_train,ds_val,epochs,
                                                 optimizer, mute=mute, N_init=N_init)
        elif model_name == 'CP':
            model = m.CPGaussian(K, M)
            train_loss,val_loss = model.fit_val(ds_train,ds_val,epochs,
                                                 optimizer, mute=mute, N_init=N_init)
        # elif model_name == 'GMM':
        #     model = m.GMM(K,M)
        #     train_loss = model.fit(X_train, EPOCHS=epochs, mu_init='random', mute=mute)
        #     for j,x in enumerate(ds_test):
        #         test_loss[j*batch_size:j*batch_size+x.shape[0]] = model(x).numpy()
        else:
            raise Exception('Provided model_name not valid')
        
        train_learning_curves.append(train_loss)
        val_learning_curves.append(val_loss)
        error_train[i] = train_loss[-1]
        error_val[i] = val_loss[-1]
        
    CV_dict = {
        'error_train' : error_train,
        'error_val' : error_val,
        'train_learning_curves' : train_learning_curves,
        'val_learning_curves' : val_learning_curves
        }

    return CV_dict
コード例 #5
0
ファイル: CP_gmm.py プロジェクト: simonamtoft/tfde-tfp
# Inspect the data
f, ax = plt.subplots(figsize=(5, 5))
ax.plot(data[:, 0], data[:, 1], '.')
ax.axis('equal')
ax.set_title(name + f' with {N} points')
plt.show()

# Split into batches
batch_size = 200
dataset = d.to_tf_dataset(data, batch_size=batch_size)

#%% Define model and training parameters
K = 8  # Number of components
M = data.shape[1]  # Number of dimensions in data
model = m.CPGaussian(K, M)
# model = m.GMM(K,M)

EPOCHS = 200
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

#%% Train model
losses = model.fit(dataset, EPOCHS, optimizer, 'kmeans')
# losses = model.fit(data,10,'kmeans')

f, ax = plt.subplots()
ax.plot(range(len(losses)), np.array(losses))
ax.set_title('Training loss')
ax.set_xlabel('iteration')
plt.show()