Ejemplo n.º 1
0
def withPCA(dimensions):
    '''
    It finds the principal components of fmri_train and keeps the
    number of components given in dimensions. It then runs lasso on every
    semantic feature for a list of lambdas from 80 to 120 and keeps the the
    w with the least RMSE on the validation data. It returns the accuracy on
    the test data, the best weights and the pca fit. It also saves w on a file.
    :param dimensions: number of dimensions for the principal components
    :return: accuracy, bestw, the the pca fit
    '''
    pca = PCA(n_components=dimensions)
    pca.fit(fmri_train)
    xtrainpcaed = pca.transform(fmri_train)
    xtrainPCA = sparse.csc_matrix(xtrainpcaed)
    xtest = pca.transform(fmri_test)
    num_features = ytrain.shape[1]
    d = xtrainPCA.shape[1]
    ntotdata = xtrainPCA.shape[0]
    ntrain = 250  # number of data to be trained on, rest are used as cross validation
    bestw = np.zeros([num_features, d])
    accuracy = np.zeros(d)
    lasso = lassoSolver.LassoClass()
    lambda_list = list(range(80, 120))  # list of lambdas to use
    for i in range(num_features):
        print('looking at feature ', i)
        bestw[i, :] = lasso.descendingLambda(
            ytrain[0:ntrain, i].reshape(ntrain, 1), xtrainPCA[0:ntrain, :],
            ytrain[ntrain:, i].reshape(ntotdata - ntrain, 1),
            xtrainPCA[ntrain:, :], lambda_list).reshape(d)
    wfile = "allwallfeatures_pca300_lambda80_120.mtx"  # name of w file to save as
    io.mmwrite(wfile, bestw)
    test_suite.main(wfile, wordid_train, wordid_test, wordfeature_std, xtest)
    return [accuracy, bestw, pca]
Ejemplo n.º 2
0
def withPCA (dimensions):
    '''
    It finds the principal components of fmri_train and keeps the
    number of components given in dimensions. It then runs lasso on every
    semantic feature for a list of lambdas from 80 to 120 and keeps the the
    w with the least RMSE on the validation data. It returns the accuracy on
    the test data, the best weights and the pca fit. It also saves w on a file.
    :param dimensions: number of dimensions for the principal components
    :return: accuracy, bestw, the the pca fit
    '''
    pca = PCA(n_components=dimensions)
    pca.fit(fmri_train)
    xtrainpcaed= pca.transform(fmri_train)
    xtrainPCA = sparse.csc_matrix (xtrainpcaed)
    xtest = pca.transform (fmri_test)
    num_features = ytrain.shape[1]
    d = xtrainPCA.shape[1]
    ntotdata = xtrainPCA.shape[0]
    ntrain = 250 # number of data to be trained on, rest are used as cross validation
    bestw = np.zeros([num_features,d])
    accuracy = np.zeros(d)
    lasso = lassoSolver.LassoClass()
    lambda_list = list(range(80,120)) # list of lambdas to use
    for i in range(num_features):
      print ('looking at feature ', i)
      bestw[i,:]  = lasso.descendingLambda(ytrain[0:ntrain,i].reshape(ntrain,1), xtrainPCA[0:ntrain,:], ytrain[ntrain:,i].reshape(ntotdata-ntrain,1), xtrainPCA[ntrain:,:], lambda_list).reshape(d)
    wfile = "allwallfeatures_pca300_lambda80_120.mtx" # name of w file to save as
    io.mmwrite(wfile, bestw)
    test_suite.main(wfile,wordid_train,wordid_test,wordfeature_std,xtest)
    return [accuracy,bestw, pca]
Ejemplo n.º 3
0
def nonlinearFeatures(dimensions):
    '''
    Applies PCA for the given amount of dimensions. It then multiplies
    each principal component with each other principal component to create
    non linear features and finds the least squares solution.
    :param dimensions: amount of principal components to keep
    :return: [accuracy,rmsetrain,rmsetest,rmsetestwrong]
    '''
    [xtrainPCA, xtestPCA] = pcaData(300, fmri_train, fmri_train)
    n = xtrainPCA.shape[0]
    dimPCA = xtrainPCA.shape[1]
    xNonlinear = np.zeros((n, dimPCA + dimPCA * dimPCA / 2))
    xtestNonlinear = np.zeros(
        (xtestPCA.shape[0], dimPCA + dimPCA * dimPCA / 2))
    counter = dimPCA
    xNonlinear[:, 0:dimPCA] = xtrainPCA
    xtestNonlinear[:, 0:dimPCA] = xtestPCA
    for i in range(dimPCA):
        for j in range(i + 1, dimPCA):
            xNonlinear[:, counter] = xtrainPCA[:, i] * xtrainPCA[:, j]
            xtestNonlinear[:, counter] = xtestPCA[:, i] * xtestPCA[:, j]
            counter += 1
    num_features = ytrain.shape[1]
    d = xNonlinear.shape[1]
    ntotdata = xNonlinear.shape[0]
    bestw = np.zeros([num_features, d])
    bestw0 = np.zeros(num_features)
    accuracy = np.zeros(d)
    for i in range(num_features):
        y = ytrain[:, i].reshape(ntotdata, 1)
        x = xNonlinear[:, :]
        w = least_squares(x, y)
        bestw[i, :] = w.reshape(d)
    wfile = "w_lse_nonlinear.mtx"
    io.mmwrite(wfile, bestw)
    [accuracy] = test_suite.main(bestw, bestw0, wordid_test, wordfeature_std,
                                 xtestNonlinear)
    print(accuracy)
    [ytest, ywrong] = test_suite.prepareData(wordid_test, wordfeature_std)
    rmsetest = rmse_per_semantic_feature(xtestNonlinear, ytest, bestw)
    rmsetrain = rmse_per_semantic_feature(xNonlinear, ytrain, bestw)
    rmsetestwrong = rmse_per_semantic_feature(xtestNonlinear, ywrong, bestw)
    return [accuracy, rmsetrain, rmsetest, rmsetestwrong]
Ejemplo n.º 4
0
def findw_PCA_LSE(dimensions,
                  train_data,
                  test_data,
                  wordid_test,
                  ytrain,
                  wfile="w_lse_dim299.mtx"):
    '''
    Find number of dimensions given principal components of the data
    and using that it solves least squares to obtain the weights for each semantic
    feature. Using the weights obtained it calculates the predicted semantic features
    on the test data set and returns the accuracy on a guess out of 2 words. It also
    returnsrmse on test and training data, and on the wrong column of training data.
    :param dimensions: number of pca components
    :param train_data: training data
    :param test_data: test data
    :param wordid_test: ids of the words in the test data set
    :param ytrain: semantic features values for training data
    :param wfile: name of file where w's will be written
    :return:
    '''
    [xtrainPCA, xtestPCA] = pcaData(dimensions, train_data, test_data)
    num_features = ytrain.shape[1]
    d = xtrainPCA.shape[1]
    ntotdata = xtrainPCA.shape[0]
    bestw = np.zeros([num_features, d])
    bestw0 = np.zeros(num_features)
    accuracy = np.zeros(d)
    for i in range(num_features):
        print('looking at feature ', i)
        y = ytrain[:, i].reshape(ntotdata, 1)
        x = xtrainPCA[:, :]
        w = least_squares(x, y)
        bestw[i, :] = w.reshape(dimensions)
    io.mmwrite(wfile, bestw)
    [accuracy] = test_suite.main(bestw, bestw0, wordid_test, wordfeature_std,
                                 xtestPCA)
    print(accuracy)
    [ytest, ywrong] = test_suite.prepareData(wordid_test, wordfeature_std)
    rmsetest = rmse_per_semantic_feature(xtestPCA, ytest, bestw)
    rmsetrain = rmse_per_semantic_feature(xtrainPCA, ytrain, bestw)
    rmsetestwrong = rmse_per_semantic_feature(xtestPCA, ywrong, bestw)
    return [accuracy, rmsetrain, rmsetest, rmsetestwrong]
Ejemplo n.º 5
0
def nonlinearFeatures (dimensions):
    '''
    Applies PCA for the given amount of dimensions. It then multiplies
    each principal component with each other principal component to create
    non linear features and finds the least squares solution.
    :param dimensions: amount of principal components to keep
    :return: [accuracy,rmsetrain,rmsetest,rmsetestwrong]
    '''
    [xtrainPCA,xtestPCA] = pcaData(300, fmri_train, fmri_train)
    n = xtrainPCA.shape[0]
    dimPCA = xtrainPCA.shape[1]
    xNonlinear = np.zeros((n,dimPCA + dimPCA*dimPCA/2))
    xtestNonlinear = np.zeros((xtestPCA.shape[0],dimPCA + dimPCA*dimPCA/2))
    counter = dimPCA
    xNonlinear [:,0:dimPCA] = xtrainPCA
    xtestNonlinear [:,0:dimPCA] = xtestPCA
    for i in range(dimPCA):
        for j in range(i+1,dimPCA):
            xNonlinear[:,counter] = xtrainPCA[:,i] * xtrainPCA[:,j]
            xtestNonlinear[:,counter] = xtestPCA[:,i] * xtestPCA[:,j]
            counter +=1
    num_features = ytrain.shape[1]
    d = xNonlinear.shape[1]
    ntotdata = xNonlinear.shape[0]
    bestw = np.zeros([num_features,d])
    bestw0 = np.zeros(num_features)
    accuracy = np.zeros(d)
    for i in range(num_features):
        y = ytrain[:,i].reshape(ntotdata,1)
        x = xNonlinear[:,:]
        w = least_squares (x,y)
        bestw[i,:]  = w.reshape(d)
    wfile = "w_lse_nonlinear.mtx"
    io.mmwrite(wfile, bestw)
    [accuracy] = test_suite.main(bestw,bestw0,wordid_test,wordfeature_std,xtestNonlinear)
    print(accuracy)
    [ytest,ywrong] = test_suite.prepareData (wordid_test,wordfeature_std)
    rmsetest = rmse_per_semantic_feature (xtestNonlinear,ytest,bestw)
    rmsetrain = rmse_per_semantic_feature (xNonlinear,ytrain,bestw)
    rmsetestwrong = rmse_per_semantic_feature (xtestNonlinear,ywrong,bestw)
    return [accuracy,rmsetrain,rmsetest,rmsetestwrong]
Ejemplo n.º 6
0
def findw_PCA_LSE (dimensions, train_data,test_data, wordid_test, ytrain, wfile = "w_lse_dim299.mtx"):
    '''
    Find number of dimensions given principal components of the data
    and using that it solves least squares to obtain the weights for each semantic
    feature. Using the weights obtained it calculates the predicted semantic features
    on the test data set and returns the accuracy on a guess out of 2 words. It also
    returnsrmse on test and training data, and on the wrong column of training data.
    :param dimensions: number of pca components
    :param train_data: training data
    :param test_data: test data
    :param wordid_test: ids of the words in the test data set
    :param ytrain: semantic features values for training data
    :param wfile: name of file where w's will be written
    :return:
    '''
    [xtrainPCA,xtestPCA] = pcaData(dimensions, train_data, test_data)
    num_features = ytrain.shape[1]
    d = xtrainPCA.shape[1]
    ntotdata = xtrainPCA.shape[0]
    bestw = np.zeros([num_features,d])
    bestw0 = np.zeros(num_features)
    accuracy = np.zeros(d)
    for i in range(num_features):
        print ('looking at feature ', i)
        y = ytrain[:,i].reshape(ntotdata,1)
        x = xtrainPCA[:,:]
        w = least_squares (x,y)
        bestw[i,:]  = w.reshape(dimensions)  
    io.mmwrite(wfile, bestw)
    [accuracy] = test_suite.main(bestw,bestw0,wordid_test,wordfeature_std,xtestPCA)
    print(accuracy)
    [ytest,ywrong] = test_suite.prepareData (wordid_test,wordfeature_std)
    rmsetest = rmse_per_semantic_feature (xtestPCA,ytest,bestw)
    rmsetrain = rmse_per_semantic_feature (xtrainPCA,ytrain,bestw)
    rmsetestwrong = rmse_per_semantic_feature (xtestPCA,ywrong,bestw)
    return [accuracy,rmsetrain,rmsetest,rmsetestwrong]