Example #1
0
    def getCentVec(self, contextVecs):

        sample, rank, dim = contextVecs.shape
        contexts = np.reshape(contextVecs, (sample * rank, dim))
        pca = PCA(n_components=1)
        pca.fit(contexts)
        return pca.components_[0]
Example #2
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X,X)
        D = np.sqrt(D)

        #TODO:
        D = self.construct_dist_graph(X , D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()


        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
Example #3
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        sorted_indices = np.argsort(D)
        G = np.zeros((n, n))

        for i in range(D.shape[0]):
            for j in range(self.nn + 1):
                G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]]
                G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i]

        dist = utils.dijkstra(G)

        dist[np.isinf(dist)] = dist[~np.isinf(dist)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist)
        Z = z.reshape(n, self.k)
        return Z
Example #4
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        # D is symmetric matrix

        geoD = np.zeros((n, n))

        # find nn-neighbours
        for i in range(n):
            sort = np.argsort(D[:, i])
            neigh = np.setdiff1d(sort[0:self.nn + 1], i)
            # find the nn+1 smallest indexes that are not i
            for j in range(len(neigh)):
                t = neigh[j]
                geoD[i, t] = D[i, t]
                geoD[t, i] = D[t, i]

        D = utils.dijkstra(geoD)
        # for disconnected vertices (distance is Inf)
        # set their dist = max_dist(graph)
        # to encourage they are far away from each other
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
Example #5
0
def getCxtSubspace(wl, dim, var_threshold=0.45):
    emb = []
    for word in wl:
        if (word not in vecDict):
            print "non-exist:", word
            continue
        wordEmbed = dict[word]
        emb.append(wordEmbed)
    emb = np.array(emb)

    pca = PCA()
    pca.fit(emb)
    varList = pca.explained_variance_ratio_
    cand = 0
    varSum = 0
    for var in varList:
        varSum += var
        cand += 1
        if (varSum >= var_threshold):
            break

    pca = PCA(n_components=cand)
    pca.fit(emb)
    top_embed = pca.components_
    print "dim:", len(top_embed.tolist()), cand
    return top_embed.tolist()
Example #6
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Construct nearest neighbour graph
        G = np.zeros([n, n])
        for i in range(n):
            neighbours = np.argsort(D[i])[:self.nn + 1]
            for j in neighbours:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        # Compute ISOMAP distances
        D = utils.dijkstra(G)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
Example #7
0
 def test_pca(self):
     X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     pca = PCA(n_comp=2)
     pca.fit(X)
     self.assertEqual(
         np.allclose(pca.explained_variance,
                     np.array([0.9924, 0.0075]),
                     atol=1e-3), True)
def cross_validation(X, Y, folds=5, split_value=0.3, name="lda"):
    # Y = Y.reshape((len(Y), 1))
    # X = np.hstack((X, Y))
    # part = -1
    #
    # if split:
    #     part = split_value
    # else:
    #     part = int(np.math.ceil(len(X) / folds))
    # scores = []
    #
    # for i in range(folds):
    #     test = np.array(X[i * part: (i + 1) * part])
    #     test = [list(d) for d in test]
    #     train = [np.array(j) for j in X if list(j) not in test]
    #     test = np.array(test)
    #     train = np.array(train)
    #
    #     train_x, train_y = train[:, :-1], train[:, -1]
    #     test_x, test_y = test[:, :-1], test[:, -1]
    #
    #     print(train_x.shape)
    #     print(test_x.shape)

    scores = []
    for fold in range(folds):
        train_x, test_x, train_y, test_y = train_test_split(
            X, Y, shuffle=True, test_size=split_value)

        if name == "lda":
            lda = LDA()
            lda.fit(train_x, train_y)

            lda_train_x = lda.transform(train_x)
            lda_test_x = lda.transform(test_x)
        else:
            pca = PCA()
            pca.fit(train_x)

            pca_train_x = pca.transform(train_x)
            pca_test_x = pca.transform(test_x)
        '''classifier'''
        lr = LogisticRegression(solver='saga', n_jobs=4)
        lr.fit(train_x, train_y)
        score = lr.score(test_x, test_y)
        scores.append(score)
        print("accuracy on  fold ", fold, " : ", score)

    mean = np.mean(scores)
    std = np.std(scores)
    print("mean accuracy : ", mean)
    print("standard deviation : ", std)

    return mean, std, scores
def pcaSenEmb(sent_vecs, var_threshold=0.6):
    """
    output: basis of context space
    """
    pca = PCA()
    pca.fit(sent_vecs)
    var_list = pca.explained_variance_ratio_
    cand = 0
    var_sum = 0
    for var in var_list:
        var_sum += var
        cand += 1
        if (var_sum >= var_threshold):
            break
    basis = pca.components_
    return basis
Example #10
0
    def compress(self, X):
        n = X.shape[0]
        k = self.k

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Initialize low-dimensional representation with PCA
        pca = PCA(k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, k)
        return Z
Example #11
0
    def pcaContexts(self, idxList, idx=-1, contextMatrix=None):
        '''
		input: context indices
		output: pca vectors
		'''

        vecs = self.vecMatrix[np.array(idxList)]
        # randIdx = np.random.randint(0, self.vocabSize, size=(1,), dtype='i')
        # vecs = self.vecMatrixNorm[randIdx]
        pca = PCA(n_components=self.pcaRank)
        pca.fit(vecs)
        contextVecs = pca.components_[0:self.pcaRank]

        if idx >= 0:
            contextMatrix[idx] = contextVecs

        del vecs

        return contextVecs, sum(pca.explained_variance_ratio_)
Example #12
0
def pca_subspace(elements, embedding_matrix, vector_dim, mean_centering,
                 numComponents, debugInfo):
    ferr = open("errors_pca_representation", "a+")
    flog = open("logs_pca_representation", "a+")

    if embedding_matrix.ndim == 1:  # only one word in the sentence, do nothing (no PCA), the vector-space of the word itself is the subspace
        ferr.write("[No PCA]: Only a single element from " +
                   " ".join(elements) +
                   " found in supplied embeddings for the document" +
                   "_".join(debugInfo) + "\n")
        subspace = embedding_matrix
        singularValues = np.array([1.0])
        energyRetained = 1.0
    else:
        flog.write("Original NumComponents: " + str(numComponents) +
                   " NumElements: " + str(embedding_matrix.shape[0]) + "\t")
        numComponents = min(embedding_matrix.shape[0],
                            embedding_matrix.shape[1], numComponents)
        flog.write("New NumComponents: " + str(numComponents) + "\n")

        pca = PCA(n_components=numComponents, mean_centering=mean_centering)
        try:
            pca.fit(embedding_matrix)
            subspace = pca.components_
            if numComponents == 1:  # convert matrix to vector when numComponents = 1
                subspace = subspace.T.reshape(-1)
            energyRetained = np.sum(pca.explained_variance_ratio_)
            singularValues = pca.singular_values_
        except (
                np.linalg.LinAlgError, ZeroDivisionError
        ) as e:  # Fails (svd doesn't converge) for some reason. Use the word-vector average in this case!
            ferr.write("[SVD Error]: No subspace constructed for " +
                       " ".join(elements) + " in the document: " +
                       "_".join(debugInfo) + "\n")
            subspace = np.mean(embedding_matrix, axis=0)
            singularValues = np.array([1.0])
            energyRetained = 1.0
    ferr.close()
    flog.close()
    return subspace, singularValues, energyRetained
Example #13
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        np.fill_diagonal(D, np.inf)
        ########
        #"finding the neighbor at each point"
        G = np.matrix(np.ones((n, n)) * 0)
        for i in range(n):
            neighbours = np.argsort(D[:, i])
            #want only the k nearest
            for j in neighbours[1:self.nn + 1]:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        #weighted shortest path between points (dijksta's)
        D = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                D[i, j] = utils.dijkstra(G, i, j)

        ########

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        ########
        # TODO #
        G = np.full((n, n), np.inf)
        for i in range(n):
            for j in range(n):
                #temp = np.list(D[i]).sort
                temp = sorted(D[i])
                #print(self.nn+1)
                if D[i][j] in temp[:(self.nn + 1)]:
                    G[i][j] = D[i][j]

        for i in range(n):
            for j in range(n):
                D[i][j] = utils.dijkstra(G, i, j)

        ########

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()
        #G[np.isinf(G)] = G[~np.isinf(G)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
Example #15
0
def Bonus2():
    '''
		Visualization of the first 10 eigen vectors.
	'''
    # raw = genfromtxt('digits-raw.csv', delimiter=',')
    raw = genfromtxt('digits-raw-small.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(10)
    eigvec = pca.fit(X)
    eigimg = eigvec.reshape(10, 28, 28)
    for r in range(2):
        for c in range(5):
            i = r * 5 + c
            subplot(2, 5, i + 1)
            imshow(eigimg[i], cmap='gray')
            title(str(i))
    show()
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
from pca import PCA

#data = datasets.load_digits()
data = datasets.load_iris()
X = data.data
y = data.target

# Project the data onto the 2 primary principal components
pca = PCA(2)
pca.fit(X)
X_projected = pca.transform(X)

print('Shape of X:', X.shape)
print('Shape of transformed X:', X_projected.shape)

x1 = X_projected[:, 0]
x2 = X_projected[:, 1]

plt.scatter(x1,
            x2,
            c=y,
            edgecolor='none',
            alpha=0.8,
            cmap=plt.cm.get_cmap('viridis', 3))

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar()
Example #17
0
if __name__ == '__main__':
    struct_log = "./data/HDFS/HDFS_100k.log_structured.csv"

    ## 1. 加载日志文件 提取特征向量
    x_train, _ = load_HDFS(struct_log)
    feature_extractor = FeatureExtractor()
    x_train = feature_extractor.fit_transform(x_train,
                                              term_weighting='tf-idf',
                                              normalization='zero-mean')

    ## 2. Train an unsupervised model
    print('Train phase:')
    # Initialize PCA, or other unsupervised models, LogClustering, InvariantsMiner
    model = PCA()
    # Model hyper-parameters may be sensitive to log data, here we use the default for demo
    model.fit(x_train)
    # Make predictions and manually check for correctness. Details may need to go into the raw logs
    y_train = model.predict(x_train)
    print(f"y_train: {y_train}")

    ## 3. Use the trained model for online anomaly detection
    print('Test phase:')
    # Load another new log file. Here we use struct_log for demo only
    x_test, _ = load_HDFS(struct_log)
    # Go through the same feature extraction process with training, using transform() instead
    x_test = feature_extractor.transform(x_test)
    # Finally make predictions and alter on anomaly cases
    y_test = model.predict(x_test)
    print(f"y_test: {y_test}")
Example #18
0
        coloredlogs.install(level='DEBUG', logger=logger)
    else:
        coloredlogs.install(level='WARNING', logger=logger)

    logger.info('Fetching data...')
    data = fetch_data(ratio=0.8)

    X_train, y_train = data['train']

    D, N = X_train.shape

    pca = PCA(n_comps=M, standard=standard, logger=logger)
    logger.info('Applying PCA with M=%d' % M)

    # normalise data
    W_train = pca.fit(X_train)
    logger.debug('W_train.shape=%s' % (W_train.shape,))

    X_test, y_test = data['test']
    I, K = X_test.shape
    assert I == D, logger.error(
        'Number of features of test and train data do not match, %d != %d' % (D, I))

    W_test = pca.transform(X_test)
    logger.debug('W_test.shape=%s' % (W_test.shape,))

    classes = set(y_train.ravel())

    C = len(classes)

    combs = list(itertools.combinations(classes, 2))
# -*- coding: utf-8 -*-
"""
Created on Fri Feb  7 23:53:40 2020

@author: ABOLI
"""
from pca import PCA
X = []
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca_model = PCA(2)
pca_model.fit(X)
print(pca_model.variance_ratio)
print(pca_model.transform(X))
Example #20
0
data = raw_data[[
    'Gender', 'Married', 'Education', 'ApplicantIncome', 'LoanAmount',
    'Credit_History'
]]

x = data.to_numpy()
# x = ss.fit_transform(x)

print("[INFO] Standardizing input vectors ... ")
for i in range(x.shape[0]):
    # standardize each vector
    x[i] = standardize(x[i])

print("[INFO] Implementing principal components analysis ... ")
pca = PCA()
x = pca.fit(x)
y = raw_data['outcome'].to_numpy()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

model = KernelSVM()
model.fit(x_train, y_train, alpha=0.01, iterations=100)

predictions = model.predict(x_test)
accuracy = accuracy_score(predictions, y_test)

print("[INFO] Home made recipe : " + str(accuracy))

model = SVC(kernel='rbf')
model.fit(x_train, y_train)
Example #21
0
y = data.target

# Minimum - maximum normalizasyon işlemi
X_min_max = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

fig, axes = plt.subplots(1, 2)
axes[0].scatter(X[:, 0], X[:, 1], c=y)
axes[0].set_title("Gerçek Veri")
axes[1].scatter(X_min_max[:, 0], X_min_max[:, 1], c=y)
axes[1].set_title("Min-Max Norm. Veri")
plt.show()

# Verileri iki temel bileşen ile gösterme
from pca import PCA
pca = PCA(2)
pca.fit(X_min_max)
X_projected = pca.transform(X_min_max)

print('Min-Max Normalizasyonlu X:', X_min_max.shape)  # (150, 4)
print('PCA Uygulanan X:', X_projected.shape)  # (150, 2)

x1 = X_projected[:, 0]
x2 = X_projected[:, 1]

plt.scatter(x1,
            x2,
            c=y,
            edgecolor='none',
            alpha=0.8,
            cmap=plt.cm.get_cmap('viridis', 3))
#
# bar.finish()
# end_time = time.time()
# print("Accuracy for Simple Nearest Neighbour @rank 1 : ", "{:.4%}".format(rank_one_score / len(query_labels)))
# print("Accuracy for Simple Nearest Neighbour @rank 5 : ", "{:.4%}".format(rank_five_score / len(query_labels)))
# print("Accuracy for Simple Nearest Neighbour @rank 10 : ", "{:.4%}".format(rank_ten_score / len(query_labels)))
#
# print("Computation Time: %s seconds" % (end_time - start_time))

# PCA-MMC
print("-----PCA_MMC-----")
pca = PCA(original_train_features,
          original_train_labels,
          M=500,
          low_dimension=False)
pca.fit()
mmc = MMC_Supervised(max_iter=20, convergence_threshold=1e-5)
mmc_metric = mmc.fit(pca.train_sample_projection, original_train_labels)
transformed_features = mmc_metric.transform(features)
transformed_query_features = transformed_features[query_idxs - 1]

n = 10
start_time = time.time()
rank_one_score = 0
rank_five_score = 0
rank_ten_score = 0
bar.start()
for k in range(len(query_features)):
    bar.update(k + 1)
    feature_vector = transformed_query_features[k]
    gallery_vectors = transformed_features[gallery_data_idx[k] - 1]
Example #23
0
import numpy as np
from pca import PCA, PcaType

data = np.array([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1],
                 [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]])

pca = PCA()
pca.fit(data=data, pcaType=PcaType.Two)
pca.explained_variance_ratio()
pca.get_covariance()
pca.singular_values()
pca.transform(data=data, n_components=2)
import numpy
from pca import PCA

# we'll create a random dataset of 5 variables and 100 samples
random_dataset = numpy.random.rand(100, 5)

# define a pca object and specify a number of components
pca_ = PCA(n_components=2)

# fit the model using dataset
pca_.fit(dataset=random_dataset)

# transform the dataset
new_dataset = pca_.transform(dataset=random_dataset)

# print the new and old data shapes
print("Original shape:{}, new shape: {}".format(random_dataset.shape,
                                                new_dataset.shape))
train_y = np.asarray(train_y)
test_y = np.asarray(test_y)
print(test_y.shape)
print(np.unique(Y))

type = int(input("pca or lda or both ?"))

if type == 1:
    if d == 2:
        split = float(1 / 6)
    else:
        split = 0.3

    s = "pca"
    pca = PCA()
    pca.fit(train_x)
    joblib.dump(pca.eigen_vectors, "pca_prjection_" + str(d) + ".pkl")

    pca_train_x = pca.transform(train_x)
    pca_test_x = pca.transform(test_x)
    # del pca
    print("pca done")

    lr = LogisticRegression(solver='saga', n_jobs=4)
    lr.fit(pca_train_x, train_y)
    print("accuracy on test data : ", lr.score(pca_test_x, test_y))
    pr = lr.predict_proba(pca_test_x)
    pt = lr.predict(pca_test_x)

    tt = [np.argmax(i) for i in pr[:10]]
    print(tt)
Example #26
0
def train(settings):
    Xtrain, ytrain, Xval, yval, Xtest, ytest = cross_val(
        path=settings[PATH], k=settings[FOLDS], emotions=settings[EMOTIONS])
    # Each fold will have a new model that is used on the test data one time. The results are stored here
    test_loss, test_acc = [], []

    # Save all the models, this way we can access their loss and accuracy stats
    models = []

    # List of confusion matrix for later task
    cms = []

    # For every fold, fit a new PCA, train new model, do validation, save the best model
    for k in range(settings[FOLDS]):
        Xtrain_k, ytrain_k, Xval_k, yval_k, Xtest_k, ytest_k = Xtrain[
            k], ytrain[k], Xval[k], yval[k], Xtest[k], ytest[k]

        # Shuffle so there is no pattern
        Xtrain_k, ytrain_k = shuffle_data(Xtrain_k, ytrain_k)
        Xval_k, yval_k = shuffle_data(Xval_k, yval_k)
        Xtest_k, ytest_k = shuffle_data(Xtest_k, ytest_k)

        logging.info("Started fold number: {}".format(k))

        # Convert to numpy arrays
        Xtrain_k, Xval_k, Xtest_k = np.array(Xtrain_k), np.array(
            Xval_k), np.array(Xtest_k)

        # Based on model there is different functions that needs to be set
        if settings[MODEL] == SoftmaxRegression:
            ytrain_k, yval_k, ytest_k = one_hot_encode(
                ytrain_k), one_hot_encode(yval_k), one_hot_encode(ytest_k)
            loss_function = softmax_loss_function
            accuracy = softmax_accuracy
        else:
            ytrain_k, yval_k, ytest_k = np.reshape(
                ytrain_k,
                (-1, 1)), np.reshape(yval_k,
                                     (-1, 1)), np.reshape(ytest_k, (-1, 1))
            loss_function = logistic_loss_function
            accuracy = logistic_accuracy
        # Fit the pca only using training data
        pca = PCA(settings[NUM_COMPONENTS])
        pca.fit(Xtrain_k)

        # Project Xtrain, Xval, Xtest onto the principal components
        Xtrain_k, Xval_k, Xtest_k = pca.transform(Xtrain_k), pca.transform(
            Xval_k), pca.transform(Xtest_k)

        # Make new model for this fold
        model = settings[MODEL](settings)

        best_weights, min_loss = model.weights, np.inf
        for epoch in range(1, settings[EPOCHS] + 1):
            # Select method for updating weights
            if settings[BATCH]:
                model.batch_gradient_descent(Xtrain_k, ytrain_k)
            else:
                model.stochastic_gradient_descent(Xtrain_k, ytrain_k)

            # Using an objective function to calculate the loss, and calculate the accuracy
            train_loss = loss_function(model, Xtrain_k, ytrain_k)
            val_loss = loss_function(model, Xval_k, yval_k)
            train_acc = accuracy(model, Xtrain_k, ytrain_k)
            val_acc = accuracy(model, Xval_k, yval_k)

            # Save the result for later graphs
            model.train_loss.append(train_loss)
            model.val_loss.append(val_loss)
            model.train_acc.append(train_acc)
            model.val_acc.append(val_acc)

            # Check if this is the lowest loss so far, if then save weights for best model
            if val_loss < min_loss:
                best_weights = np.copy(model.weights)
                min_loss = val_loss

            # Status update on how the training goes
            if epoch % 10 == 0:
                logging.info(
                    "Epoch: {}, Train_loss: {} , Val_loss: {}, Train_acc: {}, Val_acc: {}"
                    .format(epoch, train_loss, val_loss, train_acc, val_acc))

        # Now update the weights in the model to the best weights
        model.weights = best_weights

        # Use this model on the test data, and save loss & accuracy
        test_loss.append(loss_function(model, Xtest_k, ytest_k))
        test_acc.append(accuracy(model, Xtest_k, ytest_k))

        if settings[MODEL] == SoftmaxRegression:
            cf_matrix = confusion_matrix(model, Xtest_k, ytest_k)
            cms.append(cf_matrix)

        # Model finished, add it to list of models
        models.append(model)

    # Calculate the average test_loss and test_acc
    avg_test_loss, avg_test_acc = np.mean(test_loss), np.mean(test_acc)
    std_test_acc = np.std(test_acc)

    logging.info("Average Test Loss Overall Folds: {}".format(avg_test_loss))
    logging.info(
        "Average Test Accuracy Overall Folds: {}".format(avg_test_acc))
    logging.info("Std Test Accuracy Overall Folds: {}".format(std_test_acc))
    logging.info("Generating plots")

    train_losses = [model.train_loss for model in models]
    val_losses = [model.val_loss for model in models]
    train_acces = [model.train_acc for model in models]
    val_acces = [model.val_acc for model in models]

    graph_loss(train_losses, val_losses, settings)
    graph_acc(train_acces, val_acces, settings)
    pca.display_pc(settings)

    # Visualize the cf matrix and weights for each emotion
    if settings[MODEL] == SoftmaxRegression:
        avg_cf_matrix = np.mean(cms,
                                axis=0)  # Take the average of all matrixes
        graph_cm(avg_cf_matrix, settings)
        visualize_weights(models, pca, settings)

    return train_losses
Example #27
0
        plt.ylabel("$x_{%d}$" % f2)
        for i in range(n):
            plt.annotate(animals[i], (X[i, f1], X[i, f2]))
        utils.savefig('two_random_features.png')

    elif question == '2.2':
        dataset = load_dataset('animals.pkl')
        X = dataset['X'].astype(float)
        animals = dataset['animals']
        n, d = X.shape

        # standardize columns
        X = utils.standardize_cols(X)

        model = PCA(k=2)
        model.fit(X)
        Z = model.compress(X)
        fig, ax = plt.subplots()
        plt.ylabel('z2')
        plt.xlabel('z1')
        ax.scatter(Z[:, 0], Z[:, 1])
        for i in range(n):
            ax.annotate(animals[i], (Z[i, 0], Z[i, 1]))

        utils.savefig('q2_2_PCA_animals.png')

    elif question == '3.1':
        X = load_dataset('highway.pkl')['X'].astype(float) / 255
        n, d = X.shape
        print(n, d)
        h, w = 64, 64  # height and width of each image
Example #28
0
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA as sklearn_pca_model

iris_dataset = datasets.load_iris()
orginal_data = iris_dataset.data
target = iris_dataset.target

#print(orginal_data[:20])
#print(target[:20])
#print(orginal_data.shape)
#print(target.shape)

print("PCA FROM SCRATCH:")
pca_from_scratch = PCA(n_components=2)
pca_from_scratch.fit(orginal_data)
transformed_data = pca_from_scratch.transform(orginal_data)
#transformed_data = pca_from_scratch.fit_transform(orginal_data)

pca_from_scratch.plot_cov_matrix()
pca_from_scratch.plot_cumulative_explained_variance_ratio()

print(pca_from_scratch.components)
print(pca_from_scratch.explained_variance())
print(pca_from_scratch.explained_variance_ratio())

print()
print("PCA SCIKIT-LEARN:")
sklearn_pca = sklearn_pca_model(n_components=2)
scaler = StandardScaler()
scaler.fit(orginal_data)