Python RandomizedPCA.fit_transform Examples, sklearn.decomposition.RandomizedPCA.fit_transform Python Examples

Example #1

0

Show file

File: build_classifier.py Project: RAMichel/image_classifier

def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
    print "Attempting to build classifier."
    train_data_x = train_data_x_in
    transformer = ""
    # classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
    if classifier_in == "svc_basic":
        classifier = svm.SVC()
        print "Selection was basic svm.SVC."
    elif classifier_in == "svc_extensive":
        classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
        print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
    elif classifier_in == "kneighbors_basic":
        transformer = RandomizedPCA(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
    elif classifier_in == "bagging_basic":
        classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
        print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
    elif classifier_in == "spectral_basic":
        transformer = SpectralEmbedding(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
    # default to SVC in case of any sort of parsing error.
    else:
        print "Error in selecting classifier class. Reverting to SVC."
        classifier = svm.SVC()
    classifier.fit(train_data_x, train_data_y)
    print "Doing classifier estimation."
    return classifier, train_data_x, transformer

Example #2

0

Show file

def reduce_features(features,
                    var_explained=0.9,
                    n_components=0,
                    verbose=False):
    """
	Performs feature reduction using PCA. Automatically selects nr. components
	for explaining min_var_explained variance.
	:param features: Features.
	:param var_explained: Minimal variance explained.
	:param n_components: Nr. of components.
	:param exclude_columns: Columns to exclude.
	:param verbose: Verbosity.
	:return: Reduced feature set.
	"""
    if n_components == 0:
        # Run full PCA to estimate nr. components for explaining given
        # percentage of variance.
        estimator = RandomizedPCA()
        estimator.fit_transform(features)
        variance = 0.0
        for i in range(len(estimator.explained_variance_ratio_)):
            variance += estimator.explained_variance_ratio_[i]
            if variance > var_explained:
                n_components = i + 1
                if verbose:
                    print('{} % of variance explained using {} components'.
                          format(var_explained, n_components))
                break
    # Re-run PCA with only estimated nr. components
    estimator = RandomizedPCA(n_components=n_components)
    features = estimator.fit_transform(features)
    return features

Example #3

0

Show file

File: test_pipeline.py Project: cor215/scikit-learn

def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = RandomizedPCA(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))

Example #4

0

Show file

File: runClassifierExample.py Project: pglebow/image-classifier

def pca(imageData=[]):
    labels = ["shoe", "shirt"]
    is_train = np.random.uniform(0, 1, len(imageData)) <= 0.7
    y = np.where(np.array(labels) == "shirt", 1, 0)

    train_x, train_y = imageData[is_train], imageData[is_train]
    test_x, test_y = imageData[is_train == False], y[is_train == False]
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(imageData)
    df = pd.DataFrame({
        "x": X[:, 0],
        "y": X[:, 1],
        "label": np.where(y == 1, "shoe", "shirt")
    })
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label'] == label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.show()

    pca2 = RandomizedPCA(n_components=5)
    train_x = pca2.fit_transform(train_x)
    test_x = pca2.transform(test_x)

    print train_x[:5]
    knn = KNeighborsClassifier()
    knn.fit(train_x, train_y)
    return 0

Example #5

0

Show file

File: prepare.py Project: rbrecheisen/scripts

def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
	"""
	Performs feature reduction using PCA. Automatically selects nr. components
	for explaining min_var_explained variance.
	:param features: Features.
	:param var_explained: Minimal variance explained.
	:param n_components: Nr. of components.
	:param exclude_columns: Columns to exclude.
	:param verbose: Verbosity.
	:return: Reduced feature set.
	"""
	if n_components == 0:
		# Run full PCA to estimate nr. components for explaining given
		# percentage of variance.
		estimator = RandomizedPCA()
		estimator.fit_transform(features)
		variance = 0.0
		for i in range(len(estimator.explained_variance_ratio_)):
			variance += estimator.explained_variance_ratio_[i]
			if variance > var_explained:
				n_components = i + 1
				if verbose:
					print('{} % of variance explained using {} components'.format(var_explained, n_components))
				break
	# Re-run PCA with only estimated nr. components
	estimator = RandomizedPCA(n_components=n_components)
	features = estimator.fit_transform(features)
	return features

Example #6

0

Show file

File: test_pipeline.py Project: Givonaldo/scikit-learn

def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = RandomizedPCA(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))

Example #7

0

Show file

File: Plotter.py Project: fabi92/deeplearning

def pcaAndPlot(X, x_to_centroids, centroids, no_dims = 2):
    pca = RandomizedPCA(n_components=no_dims)
    x_trans = pca.fit_transform(X)
    x_sizes = np.full((x_trans.shape[0]), 30, dtype=np.int)
    plt.scatter(x_trans[:, 0], x_trans[:, 1], s=x_sizes, c=x_to_centroids)
    centroids_trans = pca.fit_transform(centroids)
    centroids_col = np.arange(centroids.shape[0])
    centroids_sizes = np.full((centroids.shape[0]), 70, dtype=np.int)
    plt.scatter(centroids_trans[:, 0], centroids_trans[:, 1], s=centroids_sizes, c=centroids_col)
    plt.show()

Example #8

0

Show file

def principal_component_analysis(x):
    sizes = np.shape(x)
    cols = sizes[1]

    # Obtain the Principal Components, which are ordered by eigenvalues
    principal_components = RandomizedPCA(n_components=cols)
    principal_components.fit_transform(x)
    eigenvalues = principal_components.explained_variance_

    # Maximum eigenvalues reflect importance of each feature
    feature_order = np.argsort(eigenvalues)[::-1][:cols]
    return feature_order

Example #9

0

Show file

def read_data_sets():
	class DataSets(object):
		pass

	NUM_CLASSES = 7
	start = time.time()
	data_sets = DataSets()

	# Load the training data
	mat_contents = sio.loadmat('labeled_images.mat')
	train_labels = mat_contents['tr_labels']
	train_identities = mat_contents['tr_identity']
	train_images = mat_contents['tr_images']

	# Load the test data
	mat_contents = sio.loadmat('public_test_images.mat')
	test_images = mat_contents['public_test_images']
	test_set_length = len(test_images[0][0])

	# Flatten images
	test_images = flattenImages(test_images)
	train_images = flattenImages(train_images)

	# Split train into validation set of size ~ test_set_length
	train_images, train_labels, validation_images, validation_labels = splitSet(
		train_images,
		train_labels,
		train_identities,
		test_set_length)

	# Convert labels to one hot vectors
	train_labels = convertToOneHot(train_labels, NUM_CLASSES)
	validation_labels = convertToOneHot(validation_labels, NUM_CLASSES)

	# Normalize the images
	sd = np.sqrt(np.var(train_images) + 0.01)
	train_images = (train_images - np.mean(train_images)) / sd
	sd = np.sqrt(np.var(validation_images) + 0.01)
	validation_images = (validation_images - np.mean(validation_images)) / sd

	pca = RandomizedPCA(n_components=15)
	train_images = pca.fit_transform(train_images)
	validation_images = pca.fit_transform(validation_images)

	# Setup the matrixes into an accessible data set class
	data_sets.train_set = DataSet(train_images, train_labels)
	data_sets.validation_set = DataSet(validation_images, validation_labels)
	data_sets.test_set = DataSet(test_images, np.zeros((len(test_images), NUM_CLASSES)))


	print('Finished setting up data! Took {} seconds'.format(time.time() - start))

	return data_sets

Example #10

0

Show file

File: img_processing_utils.py Project: dipanjanS/tag-me

def get_features_from_images_PCA(img_dir,data_set):
    
    """
    Takes in a directory and gets all the images from
    it and extracts the pixel values, flattens the matrix
    into an array and performs principle component analysis
    to get representative subset of features from the pixel
    values of the image.
    """
    
    print "\nExtracting features from given images..."
    img_names = [f for f in os.listdir(img_dir)]
    images = [img_dir+ f for f in os.listdir(img_dir)]
    #print images
    
    print "\nConverting images to vectors"
    data = []
    for image in images:
#        print image
        img = img_to_matrix(image)
        img = flatten_image(img)
        data.append(img)
    
    print "Converting image data to numpy array"
    
    time.sleep(5)
    data = np.array(data)
    print "Finished Conversion"
    time.sleep(5)
    
    print "\nPerforming PCA to get reqd features"
    features = []
    pca = RandomizedPCA(n_components=14)
    for i in xrange(len(data)/100):
        if features == []:
            split = data[0:100]
            features = pca.fit_transform(split)
        else:
            split = data[100*i:100*(i+1)]
            features = np.concatenate((features,pca.fit_transform(split)),axis=0)
    
    print "Writing feature data to file"
    f = open(data_set+"_extracted_features.txt","w")  
    for i in xrange(len(img_names)):
        s = str(img_names[i])
        for value in features[i]:
            s += " "+str(value)
        s += "\n"
        f.write(s)
    
    f.close()
    print "Write completed"

Example #11

0

Show file

File: Plotter.py Project: fabi92/deeplearning

def pcaAndPlot(X, x_to_centroids, centroids, no_dims=2):
    pca = RandomizedPCA(n_components=no_dims)
    x_trans = pca.fit_transform(X)
    x_sizes = np.full((x_trans.shape[0]), 30, dtype=np.int)
    plt.scatter(x_trans[:, 0], x_trans[:, 1], s=x_sizes, c=x_to_centroids)
    centroids_trans = pca.fit_transform(centroids)
    centroids_col = np.arange(centroids.shape[0])
    centroids_sizes = np.full((centroids.shape[0]), 70, dtype=np.int)
    plt.scatter(centroids_trans[:, 0],
                centroids_trans[:, 1],
                s=centroids_sizes,
                c=centroids_col)
    plt.show()

Example #12

0

Show file

File: crop_faces_ml.py Project: shulhi/opencv-playground

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required = True, help = "Path to the image")
    args = vars(ap.parse_args())

    image = cv2.imread(args["image"])
    rects, img = detect(image)

    cropped = []

    for idx, (x1, y1, x2, y2) in enumerate(rects):
        crop_img = image[y1:y1 + (y2 - y1), x1:x1 + (x2 - x1)]
        crop_img = cv2.resize(crop_img, (100,100), interpolation = cv2.INTER_AREA)
        cv2.imshow("image" + str(idx), crop_img)
        new_img = crop_img.reshape(crop_img.shape[0] * crop_img.shape[1], 3)
        cropped.append(new_img.flatten())

    # reduce feature size
    cropped_pca = []
    pca = RandomizedPCA(n_components=100)
    cropped_pca = pca.fit_transform(cropped)

    # training (hardcoded for now)
    clf   = SVC(probability=True)
    train = cropped_pca[:7]
    test  = cropped_pca[7:13]
    # clf.fit([[0,0],[1,1]], [1, 2])
    clf.fit(train, [1,2,2,1,2,1,1])

    for item in test:
        print clf.predict_proba(item)
        print clf.predict(item)

    cv2.waitKey(0)

Example #13

0

Show file

    def _prepare_pca(self, data, max_n_components):
        """ Helper Function """
        from sklearn.decomposition import RandomizedPCA

        # sklearn < 0.11 does not support random_state argument
        kwargs = {'n_components': max_n_components, 'whiten': False}

        aspec = inspect.getargspec(RandomizedPCA.__init__)
        if 'random_state' not in aspec.args:
            warnings.warn('RandomizedPCA does not support random_state '
                          'argument. Use scikit-learn to version 0.11 '
                          'or newer to get reproducible results.')
        else:
            kwargs['random_state'] = 0

        pca = RandomizedPCA(**kwargs)
        pca_data = pca.fit_transform(data.T)

        if self._explained_var > 1.0:
            if self.n_components is not None:  # normal n case
                self._comp_idx = np.arange(self.n_components)
                to_ica = pca_data[:, self._comp_idx]
            else:  # None case
                to_ica = pca_data
                self.n_components = pca_data.shape[1]
                self._comp_idx = np.arange(self.n_components)
        else:  # float case
            expl_var = pca.explained_variance_ratio_
            self._comp_idx = (np.where(expl_var.cumsum() <
                                      self._explained_var)[0])
            to_ica = pca_data[:, self._comp_idx]
            self.n_components = len(self._comp_idx)

        return to_ica, pca

Example #14

0

Show file

File: detect.py Project: bulbulpaul/image-detector

    def detect(self, imageURLs, params):

        array = []
        for param in params:
            img = self.img_to_matrix(param['imageURL'])
            data = self.flatten_image(img)
            array.append(data)
        array = np.array(array)

        pca = RandomizedPCA(n_components=5)
        n_data = pca.fit_transform(array)

        clf = joblib.load('src/resource/models/model.pkl')
        result = clf.predict(n_data).tolist()

        for param, r in zip(params, result):
            raw_img = urllib2.urlopen(param['imageURL']).read()
            if r == 1:
                cntr = len([i for i in os.listdir("test/images/rain/") if 'rain' in i]) + 1
                path = "static/images/rain_" + str(cntr) + '.jpg'
                f = open(path, 'wb')
                f.write(raw_img)
                f.close()
                # イベント情報作成
                when = {'type': 'timestamp', 'time':param['time']}
                where = { "type": "Point", "coordinates": [param['longitude'], param['latitude']]}
                what = {'topic': {'value':u'雨'}, 'tweet': param['value']}
                who = [{"type": "url", "value": param['imageURL']},
                       {"value": "evwh <*****@*****.**>", "type": "author"}]
                event = {'observation':{'what': what, 'when': when, 'where': where, 'who': who}}
                self.connection['event']['TwitterImageRainSensor'].insert(event)

Example #15

0

Show file

File: utils_abhi.py Project: abhishekraok/promising-patterns

def rpca(numpy_file='../data/Paintings/two_class/Paintings_train.csv'):
    """ Performs randomized PCA on given numpy file.

    Given a numpy file of n-rows and n-cols, where the last column is
    the label and rest are features,n-rows are the samples.

    :type numpy_file: string
    :param numpy_file: The file name of numpy file to be analyzed.
    """
    import numpy as np
    import matplotlib.pyplot as pl
    import pandas as pd
    from sklearn.decomposition import RandomizedPCA

    all_data = np.loadtxt(numpy_file,delimiter=',')
    data = all_data[:,:-1]
    y = all_data[:,-1]
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(data)
    df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1],\
                    "label":np.where(y==1, "realism", "abstract")})
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label']==label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.title('Randomized PCA analysis')
    pl.show()

Example #16

0

Show file

File: train_model.py Project: satojkovic/AutoGeoTagging

def main():
    img_dir = 'images/'
    images = [img_dir + f for f in os.listdir(img_dir)]
    labels = [f.split('/')[-1].split('_')[0] for f in images]
    label2ids = {v: i for i, v in enumerate(sorted(set(labels),
                                                   key=labels.index))}
    y = np.array([label2ids[l] for l in labels])

    data = []
    for image_file in images:
        img = img_to_matrix(image_file)
        img = flatten_image(img)
        data.append(img)
    data = np.array(data)

    # training samples
    is_train = np.random.uniform(0, 1, len(data)) <= 0.7
    train_X, train_y = data[is_train], y[is_train]

    # training a classifier
    pca = RandomizedPCA(n_components=5)
    train_X = pca.fit_transform(train_X)
    multi_svm = OneVsRestClassifier(LinearSVC())
    multi_svm.fit(train_X, train_y)

    # evaluating the model
    test_X, test_y = data[is_train == False], y[is_train == False]
    test_X = pca.transform(test_X)
    print pd.crosstab(test_y, multi_svm.predict(test_X),
                      rownames=['Actual'], colnames=['Predicted'])

Example #17

0

Show file

File: utils_abhi.py Project: abhishekraok/promising-patterns

def rpca(numpy_file='../data/Paintings/two_class/Paintings_train.csv'):
    """ Performs randomized PCA on given numpy file.

    Given a numpy file of n-rows and n-cols, where the last column is
    the label and rest are features,n-rows are the samples.

    :type numpy_file: string
    :param numpy_file: The file name of numpy file to be analyzed.
    """
    import numpy as np
    import matplotlib.pyplot as pl
    import pandas as pd
    from sklearn.decomposition import RandomizedPCA

    all_data = np.loadtxt(numpy_file, delimiter=',')
    data = all_data[:, :-1]
    y = all_data[:, -1]
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(data)
    df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1],\
                    "label":np.where(y==1, "realism", "abstract")})
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label'] == label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.title('Randomized PCA analysis')
    pl.show()

Example #18

0

Show file

File: make_pca_datapoints.py Project: mtinkerhess/eecs545_twitter

def make_pca_datapoints(terms_map, stopwords, clusters):
	new_terms_map = {}
	raw_data = []
	target = []
	for line in open(tweets_file):
		tokens = line.split()
		terms = [terms_map[int(term)] for term in tokens[3].split(',') if terms_map[int(term)] not in stopwords]
		for term in terms:
			if not term in new_terms_map:
				new_terms_map[term] = len(new_terms_map)
		new_term_ids = [new_terms_map[term] for term in terms]
                tags = [terms_map[int(term)] for term in tokens[4].split(',')]
		raw_data.append(new_term_ids)
		target.append(tags)
	data = lil_matrix( (len(raw_data), len(new_terms_map)) )
	count = 0
	for cur_vector in raw_data:
		for point in cur_vector:
			data[(count, point)] += 1
		count += 1
	pca = RandomizedPCA (n_components=100)
	transformed_data = pca.fit_transform(data) 
	
	xs = []
	ys = []
	count = 0
	for datum in transformed_data:
		for tag in target[count]:
			if (len(tag) > 1) and tag[1:] in clusters:
				xs.append(datum)
				ys.append(clusters[tag[1:]])
		count += 1

	del transformed_data
	return xs, ys

Example #19

0

Show file

File: helper.py Project: abhinavrk/arkstock

def do_pca(corr_matrix: _nested_ndarray, num_dim: int,
    min_var_explanation: float =0.7) -> _nested_ndarray:
    '''
    This method performs PCA on a self-correlation matrix, reducing the number of columns to `num_dim`.
    If such analysis does not sufficiently explain the underlying variance in the data, an exception is
    thrown.
    
    Args:

    * `corr_matrix` - a square matrix of correlations
    * `num_dim` - the number of dimensions to which the data should be reduced
    * `min_var_explanation` - the minimum fraction of the underlying data variance that should be explained

    Returns:

    > A matrix of the PCA result on `corr_matrix`.
    '''

    num_dim = int(num_dim)
    pca = PCA(n_components=num_dim, random_state=0)
    pca_result = pca.fit_transform(corr_matrix)
    var_ratio = pca.explained_variance_ratio_
    if sum(var_ratio) < min_var_explanation:
        raise PcaAccuracyException(
            'PCA doesn\'t explain enough of the variance in the data')

    return pca_result

Example #20

0

Show file

File: calcFeatures.py Project: r-b-g-b/AY250_HW

def calc_hog(fpaths, save=False):
    '''
    Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
    Input:
        fpaths : files on which HOG will be computed
        save : if true, output is saved to disk
    '''

    hogs = np.empty((len(fpaths), 15876))

    for i, fpath in enumerate(fpaths):
        img = imread(os.path.join(imgdir, fpath))
        if len(img.shape)==3:
            img = rgb2gray(img)
        # rescale so all feature vectors are the same length
        img_resize = resize(img, (128, 128))
        img_hog = hog(img_resize)

        hogs[i, :] = img_hog

    hogs_sc = scale(hogs)
    n_components = 15
    pca = RandomizedPCA(n_components=n_components)
    hogs_decomp = pca.fit_transform(hogs_sc)

    df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
    df.index.name='fpath'
    df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
    if save: df.to_csv('hog.csv')
    
    return df

Example #21

0

Show file

    def Q4():

        data = datasets.fetch_olivetti_faces(shuffle=True, random_state=0)
        X = data.data
        y = data.target

        image_shape = (64, 64)

        n = X.shape[0]
        n_components = 10

        model = RandomizedPCA(n_components=n_components)
        Z = model.fit_transform(X)
        Z_c = Z  #- Z.mean(axis=1).reshape((n, 1))  !!!! ERROR IN COURSERA
        Z_c = Z_c * Z_c
        Z_tot = Z_c.sum(axis=1).reshape((n, 1))

        Cos = Z_c / Z_tot

        i_s = []
        for j in range(n_components):
            i = np.argmax(Cos[:, j])
            i_s.append(i)
            image = X[i, :].reshape(image_shape)
            #plt.imshow(image)
            #plt.show()

        utils.PATH.SAVE_RESULT((3, 2), (1, 4), i_s)

        return

Example #22

0

Show file

File: visualization.py Project: chrinide/oglearn

def scatter(data, labels=None, title=None, name=None):
    """2d PCA scatter plot with optional class info

    Return the pca model to be able to introspect the components or transform
    new data with the same model.
    """
    data = atleast2d_or_csr(data)

    if data.shape[1] == 2:
        # No need for a PCA:
        data_2d = data
    else:
        pca = RandomizedPCA(n_components=2)
        data_2d = pca.fit_transform(data)

    for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
        plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
                    c=c, marker=m, label=i, alpha=0.5)

    plt.legend(loc='best')
    if title is None:
        title = "2D PCA scatter plot"
        if name is not None:
            title += " for " + name
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.title(title)

    return pca

Example #23

0

Show file

File: run_pca.py Project: rohanp/dimensionality_reduction_techniques

def main():
    protein = sys.argv[1]

    X = load_file(protein)
    """	
	scores = np.loadtxt("../LSDMap/{protein}.scores.txt".format(**locals()))

	if scores.shape[0] != RMSD.shape[0]:
		scores = scores[-RMSD.shape[0]:]
		print("selecting last N")

	models = select_N_models(RMSD[1:,1:], scores, 10000)
	keep = np.r_[0, models + 1]
	n_neigh = np.min(np.sum(RMSD < 6, axis=0)[models + 1])
	RMSD = RMSD[keep,:][:,keep]
	"""
    #models = np.arange(N)
    #np.savetxt("output/{protein}/pca/kept.txt".format(**locals()), models)
    #np.save("output/{protein}/pca/RMSD.npy".format(**locals()), RMSD[0,:])

    pca = RandomizedPCA(n_components=100, copy=False)
    proj = pca.fit_transform(X)
    acc_var = calcAccumVar(pca.explained_variance_ratio_)

    np.savetxt("output/{protein}/pca/acc_var.txt".format(**locals()), acc_var)
    np.save("output/%s/pca/proj.npy" % protein, proj)
    np.save("output/{protein}/pca/proj2D.npy".format(**locals()), proj[:, :2])

Example #24

0

Show file

    def pca(self, y):

        # select a random subset of Y dimensions (possibly gives robustness as well as speed)
        rand_dims = np.sort(
            np.random.choice(y.shape[1],
                             np.minimum(self.tree_params['num_dims_for_pca'],
                                        y.shape[1]),
                             replace=False))
        y_dim_subset = y.take(rand_dims, 1)

        pca = RandomizedPCA(n_components=1)  # compute for all components

        # optional: select a subset of exs (not so important if PCA is fast)
        if self.tree_params['sub_sample_exs_pca']:
            rand_exs = np.sort(
                np.random.choice(y.shape[0],
                                 np.minimum(
                                     self.tree_params['num_exs_for_pca'],
                                     y.shape[0]),
                                 replace=False))
            pca.fit(y_dim_subset.take(rand_exs, 0))
            return pca.transform(y_dim_subset)

        else:
            # perform PCA
            return pca.fit_transform(y_dim_subset)

Example #25

0

Show file

File: image_classifier.py Project: shreyanshd/Image-Classifier

def dimentionality_reduction(train_x , test_x):
	print "Dimentionality reduction to 10D on training and test data...."
	pca = RandomizedPCA(n_components=10)
	train_x = pca.fit_transform(train_x)
	test_x = pca.transform(test_x)
	print "Done."
	return train_x , test_x

Example #26

0

Show file

File: image_features.py Project: dvn123/MachineLearning

def randomized_pca(train_data_images, train_data_split_images,
                   test_data_images, IMG_SIZE):
    train_data_features = []
    test_data_features = []
    train_data = []
    test_data = []
    train_data_split_crossfold = []

    for image in train_data_images:
        img = img_to_matrix(image, IMG_SIZE)
        img = flatten_image(img)
        train_data.append(img)

    for image in train_data_split_images:
        img = img_to_matrix(image, IMG_SIZE)
        img = flatten_image(img)
        train_data_split_crossfold.append(img)

    for image in test_data_images:
        img = img_to_matrix(image, IMG_SIZE)
        img = flatten_image(img)
        test_data.append(img)

    pca = RandomizedPCA(50)
    return (pca.fit_transform(train_data), pca.transform(test_data))

Example #27

0

Show file

File: image_classifier.py Project: shreyanshd/Image-Classifier

def dimentionality_reduction(train_x, test_x):
    print "Dimentionality reduction to 10D on training and test data...."
    pca = RandomizedPCA(n_components=10)
    train_x = pca.fit_transform(train_x)
    test_x = pca.transform(test_x)
    print "Done."
    return train_x, test_x

Example #28

0

Show file

File: experiment1.py Project: yuchi1989/music-genre-classification-and-chord-sequence

def pca_knn():
    Xtrain,ytrain,Xtest,ytest = getSplitData()
    Xtrain, Xtest = getScaledData(Xtrain, Xtest)
    ntest = Xtest.shape[0]
    #Your code here
    for n in range(5,8):
        pca = RandomizedPCA(n_components=n)
        pca_Xtrain = pca.fit_transform(Xtrain, ytrain)
        pca_Xtest = pca.fit_transform(Xtest) 
        neigh = KNeighborsClassifier(n_neighbors=5)
        neigh.fit(pca_Xtrain, ytrain)
        
        yPredict = neigh.predict(pca_Xtest)
     
        print "parameter: n_components = ",n
        print "parameter: n_neighbors = 5"
        print "pca_knn classification accuracy: ", accuracy_score(ytest,yPredict)

Example #29

0

Show file

    def run_pca(self, features):
        """Run a principal component analysis on the training data
        """

        pca = RandomizedPCA(n_components=5)
        feautres_pca = pca.fit_transform(features)

        return feautres_pca

Example #30

0

Show file

def main():

    #get the file path from the command prompt
    if len(sys.argv) > 1:
        TEST_FILE = sys.argv[1]
    else:
        print("error: lease specify a file path")
        exit()

    print("TRAINING STARTED!")

    print("pulling images from files...")
    #Store image paths and labels
    images = []
    rawlabels = []
    for subdir, dirs, files in os.walk(DATA_DIR):
        for file in files:
            if (subdir.split('/')[1]) != "test":
                rawlabels.append(subdir.split('/')[1])
                images.append(os.path.join(subdir, file))

    print("converting images to arrays...")
    #Create a massive data array
    data = []
    labels = []
    counter = 0
    for imagePath in images:
        #print imagePath
        img = []
        try:
            img = imgToArray(imagePath)
            data.append(img)
            labels.append(rawlabels[counter])
        except IOError:
            pass
        counter += 1
    data = np.array(data)

    print("reducing arrays using randomizedPCA...")
    #randomizedPCA on training set
    #this reduces the huge amount of data points
    pca = RandomizedPCA(n_components=4)
    data = pca.fit_transform(data)

    #generate a 2D plot that shows the groupings
    #generatePlot(data,labels)

    print("using K-closest neighbors to classify data...")
    #fit the KNeighbors classifier
    knn = KNeighborsClassifier()
    knn.fit(data, labels)

    print("-----------------------------------")
    print("TESTING STARTED!")
    #test the image
    print "The test image, " + TEST_FILE + " is a:"
    test = string_to_img(TEST_FILE, pca)
    print classify_image(test, knn)

Example #31

0

Show file

File: decomposition.py Project: sjbawpn/unsupervised_learning

def rca1_decompose(dataset, n):
    rca = RandomizedPCA(n_components=n)
    reduced_features = rca.fit_transform(dataset.all.features)
    training_size = dataset.training_size
    training = Data(reduced_features[:training_size, :],
                    dataset.all.target[:training_size])
    testing = Data(reduced_features[training_size:, :],
                   dataset.all.target[training_size:])
    return DataSet(training, testing)

Example #32

0

Show file

File: classification2.py Project: scubjwu/music-genre-classification-and-chord-sequence

def pca_knn():
    Xtrain, ytrain, Xtest, ytest = getSplitData()
    Xtrain, Xtest = getScaledData(Xtrain, Xtest)
    ntest = Xtest.shape[0]
    #Your code here
    for n in range(5, 8):
        pca = RandomizedPCA(n_components=n)
        pca_Xtrain = pca.fit_transform(Xtrain, ytrain)
        pca_Xtest = pca.fit_transform(Xtest)
        neigh = KNeighborsClassifier(n_neighbors=5)
        neigh.fit(pca_Xtrain, ytrain)

        yPredict = neigh.predict(pca_Xtest)

        print "parameter: n_components = ", n
        print "parameter: n_neighbors = 5"
        print "pca_knn classification accuracy: ", accuracy_score(
            ytest, yPredict)

Example #33

0

Show file

File: AnimTSNE_experiment.py Project: Kazjon/deep_creeval

	def fit(self, X, y=None, c=None):
		"""Fit the model using X as training data.

		Parameters
		----------
		X : array, shape (n_samples, n_features) or (n_samples, n_samples)
			If the metric is 'precomputed' X must be a square distance
			matrix. Otherwise it contains a sample per row.
		"""
		X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float64)
		random_state = check_random_state(self.random_state)

		if self.early_exaggeration < 1.0:
			raise ValueError("early_exaggeration must be at least 1, but is "
							 "%f" % self.early_exaggeration)

		if self.n_iter < 200:
			raise ValueError("n_iter should be at least 200")

		if self.metric == "precomputed":
			if self.init == 'pca':
				raise ValueError("The parameter init=\"pca\" cannot be used "
								 "with metric=\"precomputed\".")
			if X.shape[0] != X.shape[1]:
				raise ValueError("X should be a square distance matrix")
			distances = X
		else:
			if self.verbose:
				print("[t-SNE] Computing pairwise distances...")

			if self.metric == "euclidean":
				distances = pairwise_distances(X, metric=self.metric, squared=True)
			else:
				distances = pairwise_distances(X, metric=self.metric)

		# Degrees of freedom of the Student's t-distribution. The suggestion
		# alpha = n_components - 1 comes from "Learning a Parametric Embedding
		# by Preserving Local Structure" Laurens van der Maaten, 2009.
		alpha = max(self.n_components - 1.0, 1)
		n_samples = X.shape[0]
		self.training_data_ = X

		P = _joint_probabilities(distances, self.perplexity, self.verbose)
		self.P = deepcopy(P)
		if self.init == 'pca':
			pca = RandomizedPCA(n_components=self.n_components,
								random_state=random_state)
			X_embedded = pca.fit_transform(X)
		elif self.init == 'random':
			X_embedded = None
		else:
			raise ValueError("Unsupported initialization scheme: %s"
							 % self.init)

		self.embedding_ = self._tsne(P, alpha, n_samples, random_state,
									 X_embedded=X_embedded, c=c)

Example #34

0

Show file

File: tasks.py Project: gvalkov/gasmeter

    def trainset(data, labels):
        pca = RandomizedPCA(n_components=10)
        std = StandardScaler()
        data = np.reshape(data, (data.shape[0], -1))
        data = pca.fit_transform(data)
        data = std.fit_transform(data)
        knn = KNeighborsClassifier()
        knn.fit(data, labels)

        return pca, std, knn

Example #35

0

Show file

def visualize():
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(data)
    df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1], "label": labels})
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label'] == label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.show()

Example #36

0

Show file

File: wsgi.py Project: jsshao/are-you-a-banana

def visualize():
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(data)
    df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1], "label":labels})
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label']==label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.show()

Example #37

0

Show file

File: PrCompColors.py Project: myazdani/color-stats

def HSV_PCA(image_paths, hue_bins = 180, sat_bins = 256, val_bins = 256):

  hsv_hists = HSV_hists(image_paths, hue_bins, sat_bins, val_bins)

  pca = RandomizedPCA(n_components=3)

  hue_pca = pca.fit_transform(np.log(hsv_hists[0]))
  sat_pca = pca.fit_transform(np.log(hsv_hists[1]))
  val_pca = pca.fit_transform(np.log(hsv_hists[2]))

  hsv_df = pd.DataFrame(data = np.hstack((hue_pca, sat_pca, val_pca)))
  h_cols = ["HuePC" + str(i) for i in range(1,4)]
  s_cols = ["SatPC" + str(i) for i in range(1,4)]
  v_cols = ["ValPC" + str(i) for i in range(1,4)]
  hsv_df.columns = h_cols + s_cols + v_cols

  df_res = pd.concat([pd.DataFrame({'image_paths': image_paths}), hsv_df], axis = 1)

  return df_res

Example #38

0

Show file

File: dimensionality_reduction.py Project: gdl-civestav-localization/cinvestav_location_fingerprinting

def get_pca(data, num_components=2):
    """
    Perform a PCA transformation
    Parameters
    ----------
    data: Values to transform
    num_components: Number of dimension of the data
    """
    pca = RandomizedPCA(n_components=num_components, whiten=False)
    data = pca.fit_transform(data)
    return data, pca.explained_variance_ratio_

Example #39

0

Show file

File: project.py Project: dhyeysejpal/CSC412Project

def get_input_pca(imgs, labels, pca=None):
    I = np.rollaxis(imgs, 2)
    I = np.reshape(I, (I.shape[0], -1))

    if not pca:
        pca = RandomizedPCA(n_components=None, copy=False, iterated_power=3, whiten=False)

    I = pca.fit_transform(I)
    L = np.ravel(labels)

    return I, L, pca

Example #40

0

Show file

def RPCA(model_data, components=None, transform_data=None):
    t0 = time()
    rpca = RandomizedPCA(n_components=components)
    if transform_data == None:
        projection = rpca.fit_transform(model_data)
    else:
        rpca.fit(model_data)
        projection = rpca.transform(transform_data)
    print "Randomized PCA Explained Variance: ", rpca.explained_variance_ratio_
    print "Randomized PCA Time: %0.3f" % (time() - t0)
    return projection

Example #41

0

Show file

File: classifier_candidatewords.py Project: prashar/FMRIClassifier

 def PlotPCA(self):
     pca = RandomizedPCA(n_components=1)
     print shape(self.fmri_train)
     pca.fit(self.fmri_train)
     print shape(pca.components_)
     trainingVector = pca.fit_transform(self.fmri_train)
     plt.plot(pca.explained_variance_ratio_)
     plt.show()
     #print pca.get_params()
     print shape(trainingVector)
     io.mmwrite('fmri_train_240samples_1components.out', trainingVector, field='real', precision=25)

Example #42

0

Show file

File: Clustering.py Project: krishnatray/CS7641

def RPCA(model_data, components = None, transform_data = None):
    t0 = time()
    rpca = RandomizedPCA(n_components=components)
    if transform_data == None:
        projection = rpca.fit_transform(model_data)
    else:
        rpca.fit(model_data)
        projection = rpca.transform(transform_data)
    print "Randomized PCA Explained Variance: ", rpca.explained_variance_ratio_
    print "Randomized PCA Time: %0.3f" % (time() - t0)
    return projection

Example #43

0

Show file

def pca_LG(train, test):
    y = []
    x_train, y_train, x_test, y_test = split_data(train, test)
    pca = RandomizedPCA(n_components=500)
    x_train = pca.fit_transform(x_train)
    x_test = pca.transform(x_test)
    lr = LogisticRegression()
    lr.fit(x_train, y_train)
    y = lr.predict(x_test)
    #print(lr.score(x_train,y_train))
    return format_y(y)

Example #44

0

Show file

def pca_knn(train, test):
    y = []
    x_train, y_train, x_test, y_test = split_data(train, test)
    pca = RandomizedPCA(n_components=2)
    x_train = pca.fit_transform(x_train)
    x_test = pca.transform(x_test)
    knn = KNeighborsClassifier()
    knn.fit(x_train, y_train)
    y = knn.predict(x_test)
    #print(knn.score(x_test,y_test))
    return format_y(y)

Example #45

0

Show file

File: helper.py Project: abhinavrk/arkstock

    def test_do_pca(self):
        pca_res = do_pca(self.dup_data, 3)

        for datum in pca_res.reshape(-1, 1):
            self.assertAlmostEqual(datum[0], 0.)

        pca_res = do_pca(self.data, 2).reshape(1, -1)[0]
        expected_pca = PCA(n_components = 2)
        expected_res = expected_pca.fit_transform(self.data).reshape(1, -1)[0]

        for expected, actual in zip(expected_res, pca_res):
            self.assertAlmostEqual(expected, actual)

Example #46

0

Show file

File: adniConverter.py Project: monkidea/master_code

    def maybeReduceDimensionality(self, img_data):
        """Dimensional reduction of 3D image matrix (numpy array)."""
        # Iterating through a ndimensional array produces slices along
        # the last axis. This is equivalent to data[i,:,:] in this case
        img_data = img_data[::self.n_slices]

        if self.reduction is None:
            """No Reduction"""
            return img_data

        elif self.reduction == "H":
            """Histogram"""
            from sklearn import preprocessing

            img_data = np.asarray(img_data, dtype=float).flat

            min_max_scaler = preprocessing.MinMaxScaler()
            scaled_data = min_max_scaler.fit_transform(img_data)

            hist = np.histogram(scaled_data,
                                bins=self.reduction_dict["H"]["value"],
                                range=None,
                                normed=False,
                                weights=None,
                                density=None)[0]

            return hist.reshape(1, hist.shape[0])

        elif self.reduction == "P":
            """Slice-wise (randomized) Principal Component Analysis"""
            from sklearn.preprocessing import normalize
            from sklearn.decomposition import RandomizedPCA

            proj_data = []
            for img_slice in img_data:
                norm_data = normalize(img_slice)

                shaped_data = np.reshape(norm_data, norm_data.size)
                # shaped_data.shape

                rpca = RandomizedPCA(
                    n_components=self.reduction_dict["P"]["value"],
                    random_state=0)
                proj_slice = rpca.fit_transform(norm_data)
                # plt.imshow(proj_data)

                # feat_data = rpca.inverse_transform(proj_data)
                # plt.imshow(feat_data)
                # plt.imshow(norm_data)

                proj_data.append(proj_slice)

            return proj_data

Example #47

0

Show file

File: import_dataset.py Project: tinypang/theano-sae

def pca(data,ncomp=100,whiten=False):
    pt4 = time.time()
    print 'import and normalization took time {0}'.format(pt4 - pt0)
    if whiten == True:   #if data needs to be pca whitened, whiten data
       pca = RandomizedPCA(n_components=ncomp, whiten=True)  #create pca object to pca whiten features
       X = pca.fit_transform(data)
    else:
       X = data         #else return data as is
    pt5 = time.time()
    print 'array cast and pca whitening took time {0}'.format(pt5 - pt2)
    print 'total time taken {0}'.format(pt5-pt0)
    return X

Example #48

0

Show file

File: image_classifier.py Project: shreyanshd/Image-Classifier

def plot_for_2d(data , y):
	print "Reducing dimension to 2D for visualization...."
	pca = RandomizedPCA(n_components=2)
	X = pca.fit_transform(data)
	df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1], "label":np.where(y==1, "Sphere", "cube")})
	colors = ["red", "yellow"]
	print "Displaying plot...."
	for label, color in zip(df['label'].unique(), colors):
		mask = df['label'] == label
		pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
	pl.show()
	print "Done."

Example #49

0

Show file

File: analysis.py Project: fdalvi/ASLtoSpeech

def reduce_dimensions(data, n, random_state=None):
    """
	Reduces the input data's dimension to 'n'.

	Args:
		data: An M x N matrix, where M is the number of samples and N is the number
			of features. The dimensions will be reduced from N to n.
		n: The new number of dimensions
	Returns:
		data: An M x n reduced dimension matrix.
	"""
    pca = RandomizedPCA(n_components=n, random_state=random_state)
    return pca.fit_transform(data)

Example #50

0

Show file

File: preProcess.py Project: ShawnJu/imageDetection

def pcaPic(data, label):
    n_components =100
    print(data.shape)
    print("train pca!!")
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(data)
    X_train_pca = pca.fit_transform(data)
    y_train  = label
    print("Fitting the classifier to the training set")
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    return pca, clf

Example #51

0

Show file

File: visuals.py Project: yxliang/Eigenstyle

def compute_pca(raw_data):
    # randomly order the data
    # seed(0)
    print('shuffling data...')
    shuffle(raw_data)
    # pull out the features and the labels
    print('pulling out data to run PCA...')
    data = np.array([cd for (cd, _y, f) in raw_data])
    print('finding principal components...')
    pca = RandomizedPCA(n_components=N_COMPONENTS, random_state=0)
    X = pca.fit_transform(data)

    return raw_data, data, pca, X

Example #52

0

Show file

File: LoadData.py Project: westamine/DL-Multispectral

def preprocess_data():
    datasets = sio.loadmat('../multi_data/Hyper_01_Urban.mat')
    hypercube = datasets['Hypercube']

    datasets = sio.loadmat('../multi_data/Hyper_01_Urban_GroundTruth.mat')
    ground_truth = datasets['Ground_Truth']

    del datasets

    hypercube_1D = np.reshape(hypercube, (-1, hypercube.shape[2]))
    rpca = RandomizedPCA(n_components=10, whiten=True)
    hypercube_1D_reduced = rpca.fit_transform(hypercube_1D)
    hypercube_reduced = np.reshape(
        hypercube_1D_reduced, (hypercube.shape[0], hypercube.shape[1], -1))

    print rpca.explained_variance_ratio_.sum()

    window_sz = 5
    window_pad = 2
    dataset_matrix_size = ((hypercube_reduced.shape[0] - window_pad) *
                           (hypercube_reduced.shape[1] - window_pad),
                           window_sz, window_sz, hypercube_reduced.shape[2])
    dataset_matrix = np.zeros(dataset_matrix_size)
    label_vector = np.zeros((dataset_matrix.shape[0], ))

    data_index = 0
    for r in range(hypercube_reduced.shape[0]):
        if r < window_pad or r > hypercube_reduced.shape[0] - window_pad - 1:
            continue
        for c in range(hypercube_reduced.shape[1]):
            if c < window_pad or c > hypercube_reduced.shape[
                    1] - window_pad - 1:
                continue

            patch = hypercube_reduced[r - window_pad:r + window_pad + 1,
                                      c - window_pad:c + window_pad + 1]
            dataset_matrix[data_index, :, :, :] = patch
            label_vector[data_index] = ground_truth[r, c]

            data_index = data_index + 1

    dataset_matrix_r = dataset_matrix[label_vector > 0, :, :, :]
    label_vector_r = label_vector[label_vector > 0]

    rand_perm = np.random.permutation(label_vector_r.shape[0])
    dataset_matrix_r = dataset_matrix_r[rand_perm, :, :, :]
    label_vector_r = label_vector_r[rand_perm]

    label_vector_r = label_vector_r - 1.0

    return dataset_matrix, label_vector, dataset_matrix_r, label_vector_r

Example #53

0

Show file

File: a_corr.py Project: helloexp/ml

def c_random_pca():
    pca_2 = RandomizedPCA(n_components=2)
    X_pca = pca_2.fit_transform(iris.data)
    print(X_pca.shape)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=iris.target, edgecolors="none")
    plt.show()

    # Percentage of variance explained by each of the selected components.
    #     If all components are stored, the sum of explained variances is equal
    #     to 1.0.
    print(pca_2.explained_variance_ratio_.sum())
    # Principal axes in feature space, representing the directions of
    #     maximum variance in the data
    print(pca_2.components_)

Example #54

0

Show file

def pca(all_corr, pc_start, pc_end):
    pca_components = pc_end - pc_start
    pca = RandomizedPCA(n_components=pca_components, whiten=False)
    print 'reducing dimensions to ' + str(pca_components) + ' PCA components'
    pc_idx = range(pc_start, pc_end)
    pca_xform = pca.fit_transform(all_corr)
    all_corr_pca = pca_xform[:, pc_idx]  #do not whiten PCA-space data
    eig = pca.components_
    variances = pca.explained_variance_ratio_
    eigenmaps = np.zeros([pca_components, masky * maskx])
    eigenmaps[:] = np.nan
    eigenmaps[:, pushmask] = eig
    eigenmaps_img = eigenmaps.reshape(pca_components, masky, maskx)
    return eigenmaps_img, all_corr_pca, variances

Example #55

0

Show file

def test_non_square_infomax():
    """ Test non-square infomax
    """
    from sklearn.decomposition import RandomizedPCA

    rng = np.random.RandomState(0)

    n_samples = 200
    # Generate two sources:
    t = np.linspace(0, 100, n_samples)
    s1 = np.sin(t)
    s2 = np.ceil(np.sin(np.pi * t))
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing matrix
    n_observed = 6
    mixing = rng.randn(n_observed, 2)
    for add_noise in (False, True):
        m = np.dot(mixing, s)

        if add_noise:
            m += 0.1 * rng.randn(n_observed, n_samples)

        center_and_norm(m)
        pca = RandomizedPCA(n_components=2, whiten=True, random_state=rng)
        m = m.T
        m = pca.fit_transform(m)
        # we need extended since input signals are sub-gaussian
        unmixing_ = infomax(m, random_state=rng, extended=True)
        s_ = np.dot(unmixing_, m.T)
        # Check that the mixing model described in the docstring holds:
        mixing_ = linalg.pinv(unmixing_.T)

        assert_almost_equal(m, s_.T.dot(mixing_))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if not add_noise:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)

Example #56

0

Show file

File: Algorithm_Distance_SVM_NL.py Project: forestdengtech/MachineLearningProjects

def get_pca_data_batch(imgs, hight=resize_hight, width=resize_width):
    """
    """
    newsize = (hight, width)

    rImgs = [lib_cv2.resize(e, newsize) for e in imgs]
    rImgs = [lib_cv2.cvtColor(e, lib_cv2.COLOR_BGR2GRAY) for e in rImgs]
    rImgs = [e.ravel() for e in rImgs]

    pca = RandomizedPCA(n_components=200, whiten=True)

    pImgs = pca.fit_transform(rImgs)

    return pImgs