Exemple #1
0
def main():
    k_range = [8, 16]
    C_range = [[0.001, 5], [0.005, 10]]
    #pca = KernelPCA(n_components=50, kernel='linear')
    pca = IncrementalPCA(n_components=50, batch_size=1000)
    #lda = LinearDiscriminantAnalysis(n_components=40)
    for k in k_range:
        print("VLAD, k:%d" % (k))
        X = VLAD(k)
        print(X.shape)
        X = StandardScaler().fit_transform(X)

        col_name = ['feature' + str(i) for i in range(X.shape[1])]
        X = pd.DataFrame(data=X, columns=col_name)
        y = pd.read_csv(y_file_name, names=['label'])
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.4)

        print("PCA")
        pca.fit(X_train)
        X_train_pca = pca.transform(X_train)
        X_test_pca = pca.transform(X_test)
        for C in C_range:
            linear_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train,
                                           y_test, C[0], 'linear')
            rbf_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train,
                                        y_test, C[1], 'rbf')
            with open('res_VLAD_PCA.txt', "a") as f:
                f.write(
                    "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'linear', C[0], linear_score))
                f.write(
                    "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'rbf', C[1], rbf_score))
def get_pca(training_data):
    # Get the principal components
    print("Applying PCA!!")
    ipca = IncrementalPCA()
    ipca.fit(training_data)
    print("\t\tDone.")
    return ipca
Exemple #3
0
def mask_encoding(masks,
                  n_components=60,
                  class_agnostic=True,
                  whiten=True,
                  sigmoid=True,
                  batch_size=1024):
    components_c = []
    mean_c = []
    ratio_c = []
    explained_variance_c = []
    if class_agnostic:
        if sigmoid:
            value_random = VALUE_MAX * np.random.rand(masks.shape[0],
                                                      masks.shape[1])
            value_random = np.maximum(value_random, VALUE_MIN)
            masks = np.where(masks > value_random, 1 - value_random,
                             value_random)
            masks = inverse_sigmoid(masks)
        pca = IncrementalPCA(n_components=n_components,
                             copy=False,
                             whiten=whiten,
                             batch_size=batch_size)
        pca.fit(masks)
        components_c.append(pca.components_[np.newaxis, :, :])
        mean_c.append(pca.mean_[np.newaxis, :])
        ratio_c.append(pca.explained_variance_ratio_[np.newaxis, :])
        explained_variance_c.append(pca.explained_variance_[np.newaxis, :])
        ratio = pca.explained_variance_ratio_.sum()
    else:
        # TODO: We have not achieve the function in class-specific.
        raise NotImplemented

    return components_c, mean_c, ratio_c, explained_variance_c, ratio
Exemple #4
0
def hierarchical(chapter_list):
    'Run hierarchical algorithm on given chapters'
    weight = tf_idf(chapter_list)
    result = AgglomerativeClustering(n_clusters=2).fit(weight)

    num_chapters = len(chapter_list)
    # Print cluster lable of each chapter
    for chapter_index in range(num_chapters):
        print('Chapter', chapter_index + 1, ':', result.labels_[chapter_index])
    # Cumulate percentages of different clusters in first half and second half
    first_half = Counter(result.labels_[:tools.FIRST_HALF])
    second_half = Counter(result.labels_[tools.FIRST_HALF:])
    # Print result
    print('Chapter 1-80:')
    print('\tClass 0:', first_half[0], '/ 80', '=', first_half[0] / 80)
    print('\tClass 1:', first_half[1], '/ 80', '=', first_half[1] / 80)
    print('Chapter 81-120:')
    print('\tClass 0:', second_half[0], '/ 40', '=', second_half[0] / 40)
    print('\tClass 1:', second_half[1], '/ 40', '=', second_half[1] / 40)
    print('Plotting clusters in 2D graph:')
    ipca = IncrementalPCA(n_components=2)
    ipca.fit(weight)
    reduction = ipca.transform(weight)
    colors = ['c', 'orangered']
    for chapter_index in range(num_chapters):
        plt.scatter(reduction[chapter_index, 0],
                    reduction[chapter_index, 1],
                    c=colors[int(result.labels_[chapter_index])],
                    marker='x')
    plt.show()
Exemple #5
0
def learn_PCA_matrix_for_resnetvecs_with_sklearn(resnetvecs,
                                                 desired_dimension):
    print('resnetvecs in learn PCA ', resnetvecs.shape)
    pca = IncrementalPCA(n_components=desired_dimension, copy=False)
    pca.fit(resnetvecs)
    joblib.dump(pca, ('pca-%d.pkl' % desired_dimension))
    return pca
Exemple #6
0
def ipca(mov, components=50, batch=1000):
    # vectorize the images
    num_frames, h, w = np.shape(mov)
    frame_size = h * w
    frame_samples = np.reshape(mov, (num_frames, frame_size)).T
    
    # run IPCA to approxiate the SVD
    
    ipca_f = IncrementalPCA(n_components=components, batch_size=batch)
    ipca_f.fit(frame_samples)
    
    # construct the reduced version of the movie vectors using only the 
    # principal component projection
    
    proj_frame_vectors = ipca_f.inverse_transform(ipca_f.transform(frame_samples))
        
    # get the temporal principal components (pixel time series) and 
    # associated singular values
    
    eigenseries = ipca_f.components_.T

    # the rows of eigenseries are approximately orthogonal
    # so we can approximately obtain eigenframes by multiplying the 
    # projected frame matrix by this transpose on the right
    
    eigenframes = np.dot(proj_frame_vectors, eigenseries)

    return eigenseries, eigenframes, proj_frame_vectors        
Exemple #7
0
def GS_IncrementalPCA(X):
    '''
    搜索最优PCA降维参数
    X:数据样本
    '''
    num1 = 0.99
    num2 = 0.98
    num3 = 0.97
    num4 = 0.95
    sum_t = 0
    count = 0
    ret = {}
    pca = IncrementalPCA(n_components=None)
    pca.fit(X)
    ratios = pca.explained_variance_ratio_
    for ratio in ratios:
        sum_t = sum_t + ratio
        count = count + 1
        if sum_t <= num4:
            ret['95%'] = count
        if sum_t <= num3:
            ret['97%'] = count
        if sum_t <= num2:
            ret['98%'] = count
        if sum_t <= num1:
            ret['99%'] = count
    return pca.n_components_, ret
Exemple #8
0
def main():
    m = word2vec.Word2Vec.load(sys.argv[1])
    X = []
    words = []
    colors = []
    with open(sys.argv[2], 'r') as f:
        for line in f:
            t = line.strip('\n').split(',')
            w = t[0]
            c = t[1]
            try:
                X.append(m[w])
                words.append(w)
                colors.append(c)
            except:
                continue

    samples = np.array(X)
    ipca = IncrementalPCA(n_components=3)
    ipca.fit(samples)

    data = ipca.transform(X)
    xs = [i[0] for i in data]
    ys = [i[1] for i in data]
    zs = [i[2] for i in data]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(xs, ys, zs)

    chf = font_manager.FontProperties(fname='msjh.ttf', size='10')
    for i, txt in enumerate(words):
        ax.text(xs[i], ys[i], zs[i], txt, color=colors[i], fontproperties=chf)

    plt.show()
def batch_fit_pca(data_loader,n_components):
    '''
    Inputs:
        data_loader - pytorch data loader giving data in batches
        n_components - int - number of principal components to use
    Output:
        pca - sklearn.decomposition.PCA - pca function fitted to data from data_loader
    '''
    batch_size=data_loader.batch_size
    n_data_points=data_loader.dataset._len

    #Batches more than 300 are usually not possible to have on memory.
    if batch_size<n_data_points or batch_size>300:
        print("Apply incremental PCA on data.")
        pca=IncrementalPCA(n_components=n_components,batch_size=min(batch_size,300))
                
        for batch_idx, (X,Y) in enumerate(data_loader):
            X=X.reshape(batch_size,-1)
            pca.partial_fit(X)
    else:
        print("Apply PCA on full data.")
        pca=PCA(n_components=n_components)
        X,Y=next(iter(data_loader))
        X=X.reshape(batch_size,-1)
        pca.fit(X)

    return(pca)
Exemple #10
0
def performPCA(data, n_components):

	ipca = IncrementalPCA(n_components=n_components, batch_size=n_components)
	ipca.fit(data)
	data = ipca.fit_transform(data)

	return data
Exemple #11
0
class IPCAEstimator():
    def __init__(self, n_components):
        self.n_components = n_components
        self.whiten = False
        self.transformer = IncrementalPCA(n_components,
                                          whiten=self.whiten,
                                          batch_size=max(
                                              100, 2 * n_components))
        self.batch_support = True

    def get_param_str(self):
        return "ipca_c{}{}".format(self.n_components,
                                   '_w' if self.whiten else '')

    def fit(self, X):
        self.transformer.fit(X)

    def fit_partial(self, X):
        try:
            self.transformer.partial_fit(X)
            self.transformer.n_samples_seen_ = \
                self.transformer.n_samples_seen_.astype(np.int64) # avoid overflow
            return True
        except ValueError as e:
            print(f'\nIPCA error:', e)
            return False

    def get_components(self):
        stdev = np.sqrt(self.transformer.explained_variance_)  # already sorted
        var_ratio = self.transformer.explained_variance_ratio_
        return self.transformer.components_, stdev, var_ratio  # PCA outputs are normalized
Exemple #12
0
def calc_PCA():
    with open('./config/config_origin.json', 'r') as f:
        CONFIG = json.load(f)
    ROOT_PATH = CONFIG["ROOT_PATH"]
    MODEL_TYPE = 'ResNet%dv%d' % (56, 2)
    FEATURE_DIR = os.path.join(ROOT_PATH, "features")
    FEATURE_DIR = os.path.join(FEATURE_DIR, "models-%s/" % MODEL_TYPE)

    features_train_bad = np.load(
        os.path.join(FEATURE_DIR, "features_train_bad.npy"))
    features_train_good = np.load(
        os.path.join(FEATURE_DIR, "features_train_good.npy"))
    features_train = np.concatenate((features_train_bad, features_train_good))

    ipca = IncrementalPCA(n_components=2, batch_size=1000)
    ipca.fit(features_train)  # fit with ALL data
    # components_train = ipca.transform(features_train)
    components_train_bad = ipca.transform(features_train_bad)
    components_train_good = ipca.transform(features_train_good)
    # print(components_train.shape) # (30000, 2)
    np.save(os.path.join(FEATURE_DIR, "components_train_bad.npy"),
            components_train_bad)
    np.save(os.path.join(FEATURE_DIR, "components_train_good.npy"),
            components_train_good)

    import matplotlib.pyplot as plt
    plt.scatter(components_train_bad[:, 0],
                components_train_bad[:, 1],
                color="r")
    plt.scatter(components_train_good[:, 0],
                components_train_good[:, 1],
                color="g")
    plt.show()
Exemple #13
0
def compute_pca_model(crystal_samples, batch_size=20):
    transformer = IncrementalPCA(batch_size=batch_size)
    transformer.fit(crystal_samples)
    W  = transformer.components_
    w0 = transformer.mean_
    z  = np.matmul((crystal_samples - w0), W.T)
    return W, w0, z
Exemple #14
0
def ipca(mov, components = 50, batch =1000):
    # vectorize the images
    num_frames, h, w = mov.shape
    frame_size = h * w
    frame_samples = np.reshape(mov, (num_frames, frame_size)).T
    
    # run IPCA to approxiate the SVD
    
    ipca_f = IncrementalPCA(n_components=components, batch_size=batch)
    ipca_f.fit(frame_samples)
    
    # construct the reduced version of the movie vectors using only the 
    # principal component projection
    
    proj_frame_vectors = ipca_f.inverse_transform(ipca_f.transform(frame_samples))
        
    # get the temporal principal components (pixel time series) and 
    # associated singular values
    
    eigenseries = ipca_f.components_.T

    # the rows of eigenseries are approximately orthogonal
    # so we can approximately obtain eigenframes by multiplying the 
    # projected frame matrix by this transpose on the right
    
    eigenframes = np.dot(proj_frame_vectors, eigenseries)

    return eigenseries, eigenframes, proj_frame_vectors        
Exemple #15
0
    def preprocess_features(features, n_components, iterative=True):
        """
    Applys PCA on the features matrix and selects the top n_components.
    :param features: (nd-array) Features matrix [n_samples, n_features]
    :param n_components: (int) Number of components to retain after PCA
    :param iterative: (bool) If True, will perform iterative PCA
    :return: (nd-array) transformed features matrix
    """
        n_components = min(n_components, features.shape[1])
        batch_size = n_components
        if iterative:
            pca = IncrementalPCA(n_components=n_components,
                                 whiten=False,
                                 batch_size=batch_size)
        else:
            pca = PCA(n_components=n_components, whiten=False)

        output = np.zeros(
            (features.shape[0], min(n_components, features.shape[1])),
            dtype=np.float)
        features = scale(features)
        pca.fit(features)

        for c in range(0, features.shape[0], batch_size):
            output[c:c + batch_size] = pca.transform(features[c:c +
                                                              batch_size])
        return output
Exemple #16
0
    def viz_cluster(self, X, title=''):
        """
        Визуализация кластеров
            X: pandas.DataFrame/numpy.ndarray/scipy.sparse.csr.csr_matrix
                Датасет
            title: str, default=''
                Заголовок графика распределения
        """
        dist = 1 - pairwise.cosine_similarity(X)       
        
        icpa = IncrementalPCA(n_components=2, batch_size=16)
        icpa.fit(dist)
        demo = icpa.transform(dist)
        xs, ys = demo[:, 0], demo[:, 1]
        
        labels = self.cluster_model.labels_
        
        labels_unique = list(set(labels))
        traces = []
        for label in labels_unique:
            indexes = np.where(labels == label)[0]
            trace = {'type': 'scatter',
                     'x': xs[indexes],
                     'y': ys[indexes],
                     'name': int(label),
                     'mode': 'markers',
                     'marker': {'size': 7},
                     'text': X.toarray()[indexes]
                    }
            traces.append(trace)
        layout = go.Layout(title=title, showlegend=True)

        data = go.Data(traces)
        fig = go.Figure(data=data, layout=layout)
        fig.show()
def test_incremental_pca():
    # Incremental PCA on dense arrays.
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    assert X_transformed.shape == (X.shape[0], 2)
    np.testing.assert_allclose(
        ipca.explained_variance_ratio_.sum(),
        pca.explained_variance_ratio_.sum(),
        rtol=1e-3,
    )

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(
            np.dot(cov, precision), np.eye(X.shape[1]), atol=1e-13
        )
Exemple #18
0
def test_incremental_pca_sparse(matrix_class):
    # Incremental PCA on sparse arrays.
    X = iris.data
    pca = PCA(n_components=2)
    pca.fit_transform(X)
    X_sparse = matrix_class(X)
    batch_size = X_sparse.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)

    X_transformed = ipca.fit_transform(X_sparse)

    assert X_transformed.shape == (X_sparse.shape[0], 2)
    np.testing.assert_allclose(ipca.explained_variance_ratio_.sum(),
                               pca.explained_variance_ratio_.sum(),
                               rtol=1e-3)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X_sparse)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(np.dot(cov, precision),
                                   np.eye(X_sparse.shape[1]),
                                   atol=1e-13)

    with pytest.raises(TypeError,
                       match="IncrementalPCA.partial_fit does not support "
                       "sparse input. Either convert data to dense "
                       "or use IncrementalPCA.fit to do so in batches."):
        ipca.partial_fit(X_sparse)
Exemple #19
0
def low_mem_pca(data):
    """
    Run Singular Value Decomposition (SVD) on input data.

    Parameters
    ----------
    data : (S [*E] x T) array_like
        Optimally combined (S x T) or full multi-echo (S*E x T) data.

    Returns
    -------
    u : (S [*E] x C) array_like
        Component weight map for each component.
    s : (C,) array_like
        Variance explained for each component.
    v : (C x T) array_like
        Component timeseries.
    """
    from sklearn.decomposition import IncrementalPCA
    ppca = IncrementalPCA(n_components=(data.shape[-1] - 1))
    ppca.fit(data)
    v = ppca.components_.T
    s = ppca.explained_variance_
    u = np.dot(np.dot(data, v), np.diag(1. / s))
    return u, s, v
Exemple #20
0
def main(args):
    print('===> args:\n', args)

    image_list = args.image_list
    feature_dir = args.feature_dir

    save_type = args.save_format
    feature_len = args.feature_dims

    i = 0
    with open(image_list, 'r') as f:
        lines = f.readlines()
        print('###### read features nums: %d ######' % (len(lines)))
        X = np.zeros(shape=(len(lines), feature_len))

        for line in lines:
            feature_name = line.strip() + save_type
            feature_path = os.path.join(feature_dir, feature_name)
            x_vec = np.ravel(matio.load_mat(feature_path))
            X[i] = x_vec[:feature_len]
            i = i + 1
    print('###### success load feature nums: %d ######' % i)
    print(X.shape)
    #ipca
    ipca = IncrementalPCA(n_components=args.n_components)
    ipca.fit(X)
    print('###### PCA Done! ######')
    joblib.dump(ipca, args.ipca_save_path)

    print('components num: %d' % ipca.n_components)
    sum_variance_ratio = 0
    for i in range(ipca.n_components):
        sum_variance_ratio += ipca.explained_variance_ratio_[i]
    print('sum_variance_ratio: %f' % sum_variance_ratio)
Exemple #21
0
    def visualize_data(self):
        ipca = IncrementalPCA(n_components=2, batch_size=3)
        ipca.fit(self.trainingData)

        self.fig = plt.figure()
        # self.ax = self.fig.add_subplot(111, projection='3d')
        self.ax = self.fig.add_subplot(111)
        projData = ipca.transform(self.trainingData)
        print(np.shape(projData))
        X1 = []
        X2 = []
        Y1 = []
        Y2 = []
        for idx in range(len(projData)):
            if self.trainingLabels[idx]:
                X1.append(projData[idx][0])
                Y1.append(projData[idx][1])
            else:
                X2.append(projData[idx][0])
                Y2.append(projData[idx][1])
        # X = np.array([ data[0] for data in projData])
        # Y = np.array([ data[1] for data in projData])
        X1 = np.array(X1)
        X2 = np.array(X2)
        Y1 = np.array(Y1)
        Y2 = np.array(Y2)

        # rospy.loginfo(np.shape(X1))
        # rospy.loginfo(np.shape(Y1))
        # rospy.loginfo(np.shape(X2))
        # rospy.loginfo(np.shape(Y2))
        rospy.loginfo("PLOTTING GRAPH")
        self.ax.plot(X1, Y1, 'r.', X2, Y2, 'g.')
        plt.show()
Exemple #22
0
class PCA(Model):
    """Given a set of input vectors, find their principle components"""
    def __init__(self, fn=None, n_comp=None, batch_size=None):
        self.model = IncrementalPCA()
        self.fn = fn
        self.params = {"n_components": n_comp, "batch_size": batch_size}
        self.set_params()

    def load(self, fn):
        """Set parameters after loading from filename"""
        super().load(fn)
        self.params = self.model.get_params()
        return

    def fit(self, reps):
        """Fit a list of representations"""
        X = [r.to_vector() for r in reps]
        self.model.fit(X)

    def err(self, to_transform, to_check_against):
        """Mesh error between reconstructed to_transform representation and
        mesh conversion of to_check_against
        """
        vec = to_transform.to_vector()
        vec_trans = self.model.transform(vec)
        vec_recon = self.model.inverse_transform(vec_trans)
        transformed = to_transform.from_vector(vec_recon)
        mesh1 = transformed.mesh()
        mesh2 = to_check_against.mesh()
        error = representation.mesh_error(mesh1, mesh2)
        return error
class IncrementalPCA_Prim(primitive):
    def __init__(self, random_state=0):
        super(IncrementalPCA_Prim, self).__init__(name='IncrementalPCA')
        self.id = 50
        self.PCA_LAPACK_Prim = []
        self.type = 'feature engineering'
        self.description = "Incremental principal components analysis (IPCA). Linear dimensionality reduction using Singular Value Decomposition of centered data, keeping only the most significant singular vectors to project the data to a lower dimensional space. Depending on the size of the input data, this algorithm can be much more memory efficient than a PCA. This algorithm has constant memory complexity."
        self.hyperparams_run = {'default': True}
        self.pca = IncrementalPCA()
        self.accept_type = 'c_t'

    def can_accept(self, data):
        return self.can_accept_c(data)

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.pca.fit(data['X'])

    def produce(self, data):
        output = handle_data(data)
        cols = list(output['X'].columns)
        cols = ["{}_pcaincrmnt".format(x) for x in cols]
        result = self.pca.transform(output['X'])
        output['X'] = pd.DataFrame(result, columns=cols[:result.shape[1]])
        final_output = {0: output}
        return final_output
Exemple #24
0
class IPCA(object):
    def __init__(self,
                 n_components=None,
                 whiten=False,
                 copy=True,
                 batch_size=None):
        """
        :param n_components:   default为None ,int 或None, 想要保留的分量数,None 时,
        min(n_samples, n_features)
        :param whiten:   bool型,可选项, 默认为False, 当true(默认情况下为false)时,components_ 向量除以
        n_samples*components_以确保具有单位组件级方差的不相关输出。
        :param copy: 默认为True,  False时,x 将被覆盖,将节约能存,但存在不安全
        :param batch_size: default None, 批量样本数,   只在fit 中使用,设为None,系统自动设成5*n_features,
        以保持经度与内存开销的平衡
        """
        self.model = IncrementalPCA(n_components=n_components,
                                    whiten=whiten,
                                    copy=copy,
                                    batch_size=batch_size)

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        return self.model.transform(X=x)

    def fit_transform(self, x, y=None):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self, deep=True):  # 获取评估器的参数
        return self.model.get_params(deep=deep)

    def set_params(self, **params):  # 设置评估器的参数
        self.model.set_params(**params)

    def inverse_transform(self, x):  # 与 fit_tansform 刚好相反的两个操作
        return self.model.inverse_transform(X=x)

    def get_precision(self):  # 根据生成模型计算精度矩阵
        return self.model.get_precision()

    def get_covariance(self):  # 根据生成模型获取协方差
        return self.model.get_covariance()

    def partial_fit(self, x, y=None, check_input=True):  # 增量训练
        self.model.partial_fit(X=x, y=y, check_input=check_input)

    def get_attributes(self):
        component = self.model.components_
        explained_variance = self.model.explained_variance_
        explained_variance_ratio = self.model.explained_variance_ratio_
        singular_values = self.model.singular_values_
        means = self.model.mean_  # 每个特征的均值
        var = self.model.var_  # 每个特征的方差
        noise_variance = self.model.noise_variance_  # 评估的噪声协方差
        n_component = self.model.n_components_
        n_samples_seen = self.model.n_samples_seen_
        return component, explained_variance, explained_variance_ratio, singular_values, means, var, noise_variance, \
               n_component, n_samples_seen
def test_incremental_pca_num_features_change():
    """Test that changing n_components will raise an error."""
    rng = np.random.RandomState(1999)
    n_samples = 100
    X = rng.randn(n_samples, 20)
    X2 = rng.randn(n_samples, 50)
    ipca = IncrementalPCA(n_components=None)
    ipca.fit(X)
    assert_raises(ValueError, ipca.partial_fit, X2)
Exemple #26
0
def get_encoder(metas, train_data, target_output_dim):
    tmpdir = metas['workspace']
    model_path = os.path.join(tmpdir, 'incremental_pca.model')

    model = IncrementalPCA(n_components=target_output_dim, whiten=True)
    model.fit(train_data)
    pickle.dump(model, open(model_path, 'wb'))

    return IncrementalPCAEncoder(model_path=model_path)
Exemple #27
0
def test_incremental_pca_num_features_change():
    # Test that changing n_components will raise an error.
    rng = np.random.RandomState(1999)
    n_samples = 100
    X = rng.randn(n_samples, 20)
    X2 = rng.randn(n_samples, 50)
    ipca = IncrementalPCA(n_components=None)
    ipca.fit(X)
    assert_raises(ValueError, ipca.partial_fit, X2)
def test_n_samples_equal_n_components():
    # Ensures no warning is raised when n_samples==n_components
    # Non-regression test for gh-19050
    ipca = IncrementalPCA(n_components=5)
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        ipca.partial_fit(np.random.randn(5, 7))
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        ipca.fit(np.random.randn(5, 7))
Exemple #29
0
def test_singular_values():
    # Check that the IncrementalPCA output has the correct singular values

    rng = np.random.RandomState(0)
    n_samples = 1000
    n_features = 100

    X = datasets.make_low_rank_matrix(n_samples,
                                      n_features,
                                      tail_strength=0.0,
                                      effective_rank=10,
                                      random_state=rng)

    pca = PCA(n_components=10, svd_solver='full', random_state=rng).fit(X)
    ipca = IncrementalPCA(n_components=10, batch_size=100).fit(X)
    assert_array_almost_equal(pca.singular_values_, ipca.singular_values_, 2)

    # Compare to the Frobenius norm
    X_pca = pca.transform(X)
    X_ipca = ipca.transform(X)
    assert_array_almost_equal(np.sum(pca.singular_values_**2.0),
                              np.linalg.norm(X_pca, "fro")**2.0, 12)
    assert_array_almost_equal(np.sum(ipca.singular_values_**2.0),
                              np.linalg.norm(X_ipca, "fro")**2.0, 2)

    # Compare to the 2-norms of the score vectors
    assert_array_almost_equal(pca.singular_values_,
                              np.sqrt(np.sum(X_pca**2.0, axis=0)), 12)
    assert_array_almost_equal(ipca.singular_values_,
                              np.sqrt(np.sum(X_ipca**2.0, axis=0)), 2)

    # Set the singular values and see what we get back
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 110

    X = datasets.make_low_rank_matrix(n_samples,
                                      n_features,
                                      tail_strength=0.0,
                                      effective_rank=3,
                                      random_state=rng)

    pca = PCA(n_components=3, svd_solver='full', random_state=rng)
    ipca = IncrementalPCA(n_components=3, batch_size=100)

    X_pca = pca.fit_transform(X)
    X_pca /= np.sqrt(np.sum(X_pca**2.0, axis=0))
    X_pca[:, 0] *= 3.142
    X_pca[:, 1] *= 2.718

    X_hat = np.dot(X_pca, pca.components_)
    pca.fit(X_hat)
    ipca.fit(X_hat)
    assert_array_almost_equal(pca.singular_values_, [3.142, 2.718, 1.0], 14)
    assert_array_almost_equal(ipca.singular_values_, [3.142, 2.718, 1.0], 14)
Exemple #30
0
def PCA_Train(data, result_fold, n_components=128):
    print_info("PCA training (n_components=%d)..." % n_components)

    pca = IncrementalPCA(n_components=n_components)
    pca.fit(data)

    joblib.dump(pca, result_fold + "pca_model.m")

    print_info("PCA done.")

    return pca
def PCA_Train(data, result_fold, n_components=128):
    print_info("PCA training (n_components=%d)..." % n_components)

    pca = IncrementalPCA(n_components=n_components)
    pca.fit(data)

    joblib.dump(pca, result_fold + "pca_model.m")

    print_info("PCA done.")

    return pca
Exemple #32
0
def incrementalpca_filtered_model(model,
                                  X_train,
                                  n_components=None,
                                  incrementalpca=None):
    element_shape = X_train.shape[1:]
    pxs_per_element = np.prod(element_shape)

    if incrementalpca is None:
        incrementalpca = IncrementalPCA(n_components=n_components)
        flatX_train = X_train.reshape(-1, pxs_per_element)
        incrementalpca.fit(flatX_train)

    return filtered_model(model, X_train, sklearn_transformer=incrementalpca)
Exemple #33
0
def test_incremental_pca_fit_overflow_error():
    # Test for overflow error on Windows OS
    # (non-regression test for issue #17693)
    rng = np.random.RandomState(0)
    A = rng.rand(500000, 2)

    ipca = IncrementalPCA(n_components=2, batch_size=10000)
    ipca.fit(A)

    pca = PCA(n_components=2)
    pca.fit(A)

    np.testing.assert_allclose(ipca.singular_values_, pca.singular_values_)
def test_singular_values():
    # Check that the IncrementalPCA output has the correct singular values

    rng = np.random.RandomState(0)
    n_samples = 1000
    n_features = 100

    X = datasets.make_low_rank_matrix(n_samples, n_features, tail_strength=0.0,
                                      effective_rank=10, random_state=rng)

    pca = PCA(n_components=10, svd_solver='full', random_state=rng).fit(X)
    ipca = IncrementalPCA(n_components=10, batch_size=100).fit(X)
    assert_array_almost_equal(pca.singular_values_, ipca.singular_values_, 2)

    # Compare to the Frobenius norm
    X_pca = pca.transform(X)
    X_ipca = ipca.transform(X)
    assert_array_almost_equal(np.sum(pca.singular_values_**2.0),
                              np.linalg.norm(X_pca, "fro")**2.0, 12)
    assert_array_almost_equal(np.sum(ipca.singular_values_**2.0),
                              np.linalg.norm(X_ipca, "fro")**2.0, 2)

    # Compare to the 2-norms of the score vectors
    assert_array_almost_equal(pca.singular_values_,
                              np.sqrt(np.sum(X_pca**2.0, axis=0)), 12)
    assert_array_almost_equal(ipca.singular_values_,
                              np.sqrt(np.sum(X_ipca**2.0, axis=0)), 2)

    # Set the singular values and see what we get back
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 110

    X = datasets.make_low_rank_matrix(n_samples, n_features, tail_strength=0.0,
                                      effective_rank=3, random_state=rng)

    pca = PCA(n_components=3, svd_solver='full', random_state=rng)
    ipca = IncrementalPCA(n_components=3, batch_size=100)

    X_pca = pca.fit_transform(X)
    X_pca /= np.sqrt(np.sum(X_pca**2.0, axis=0))
    X_pca[:, 0] *= 3.142
    X_pca[:, 1] *= 2.718

    X_hat = np.dot(X_pca, pca.components_)
    pca.fit(X_hat)
    ipca.fit(X_hat)
    assert_array_almost_equal(pca.singular_values_, [3.142, 2.718, 1.0], 14)
    assert_array_almost_equal(ipca.singular_values_, [3.142, 2.718, 1.0], 14)
Exemple #35
0
def generate_pca_compression(X, n_components=16, batch_size=100):
    """
    Compresses the data using sklearn PCA implementation.

    :param X: Data (n_samples, n_features)
    :param n_components: Number of dimensions for PCA to keep
    :param batch_size: Batch size for incrimental PCA

    :return: X_prime (the compressed representation), pca
    """

    pca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
    pca.fit(X)

    return pca.transform(X), pca
def test_incremental_pca_set_params():
    """Test that components_ sign is stable over batch sizes."""
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    X2 = rng.randn(n_samples, n_features)
    X3 = rng.randn(n_samples, n_features)
    ipca = IncrementalPCA(n_components=20)
    ipca.fit(X)
    # Decreasing number of components
    ipca.set_params(n_components=10)
    assert_raises(ValueError, ipca.partial_fit, X2)
    # Increasing number of components
    ipca.set_params(n_components=15)
    assert_raises(ValueError, ipca.partial_fit, X3)
    # Returning to original setting
    ipca.set_params(n_components=20)
    ipca.partial_fit(X)
def test_incremental_pca():
    """Incremental PCA on dense arrays."""
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]))
Exemple #38
0
    def IPCA(self, components = 50, batch =1000):
        '''
        Iterative Principal Component analysis, see sklearn.decomposition.incremental_pca
        Parameters:
        ------------
        components (default 50) = number of independent components to return
        batch (default 1000)  = number of pixels to load into memory simultaneously in IPCA. More requires more memory but leads to better fit
        Returns
        -------
        eigenseries: principal components (pixel time series) and associated singular values
        eigenframes: eigenframes are obtained by multiplying the projected frame matrix by the projected movie (whitened frames?)
        proj_frame_vectors:the reduced version of the movie vectors using only the principal component projection
        '''
        # vectorize the images
        num_frames, h, w = np.shape(self);
        frame_size = h * w;
        frame_samples = np.reshape(self, (num_frames, frame_size)).T

        # run IPCA to approxiate the SVD
        ipca_f = IncrementalPCA(n_components=components, batch_size=batch)
        ipca_f.fit(frame_samples)

        # construct the reduced version of the movie vectors using only the
        # principal component projection

        proj_frame_vectors = ipca_f.inverse_transform(ipca_f.transform(frame_samples))

        # get the temporal principal components (pixel time series) and
        # associated singular values

        eigenseries = ipca_f.components_.T

        # the rows of eigenseries are approximately orthogonal
        # so we can approximately obtain eigenframes by multiplying the
        # projected frame matrix by this transpose on the right

        eigenframes = np.dot(proj_frame_vectors, eigenseries)

        return eigenseries, eigenframes, proj_frame_vectors
Exemple #39
0
class PCALDA(AbstractFeature):
    def __init__(self,options):
        for key in options:
            setattr(self,key,options[key])

    def compute(self,X,y):
        if X.ndim == 3:
            X = X.reshape((X.shape[0],X.shape[1]*X.shape[2]))
        if not hasattr(self,"pca_dim"):
            self.pca_dim = len(X)-len(np.unique(y))

        # PCA
        self.ipca = IncrementalPCA(n_components=self.pca_dim, batch_size=None)
        self.ipca.fit(X)

        X_pca = self.ipca.transform(X)
        print("PCA train shape")
        print(X_pca.shape)

        # LDA
        self.lda = sklearn.lda.LDA()
        self.lda.fit(X_pca,y)
        X_lda = self.lda.transform(X_pca)
        return X_lda


    def extract(self,x):
        X = np.array([x])
        if X.ndim == 3:
            X = X.reshape((X.shape[0],X.shape[1]*X.shape[2]))
        X_pca = self.ipca.transform(X)
        X_lda = self.lda.transform(X_pca)
        return list(X_lda[0])

    def __repr__(self):
        return "PCALDA"
Exemple #40
0
Fichier : pca.py Projet : DaMSL/ddc
def calc_ipca(r, key, xyz, N, title=None):
  n_dim = np.prod(xyz.shape[1:])
  ipca = IncrementalPCA()
  ipca.fit(xyz.reshape(len(xyz), n_dim))
  return ipca
    Test_matrix = np.array(rowstst)
print('\nTest data loaded!\n')
print('#================================================================#')

print('#================================================================#')
print('\nshape of Training Matrix = ', Train_matrix.shape)
print('shape of Test Matrix = ', Test_matrix.shape,'\n')
print('#================================================================#')


#=========================  Principal Component Analysis  ==========================#

print ('\nRunning Incrmental PCA with 200 Componenets and 5000 batch size')

pca = IncrementalPCA(n_components=200, batch_size = 5000)
pca.fit(Train_matrix)
Train_matrix = pca.transform(Train_matrix)
Test_matrix = pca.transform(Test_matrix)

parameters = pca.get_params()
variance = pca.explained_variance_ratio_
cumvariance = pca.explained_variance_ratio_.cumsum() 
#np.savetxt("pca_result_variance_200.csv", variance, delimiter=",")
#np.savetxt("pca_result_cum_variance_200.csv", variance, delimiter=",")

print ('\nPCA complete!\n')
print ('#================================================================#')
print('\nWriting transformed Train and Test matrices to CSV\n')
print('#================================================================#')

with open(csv_pca_train_out_path, 'w', newline='') as csvtrainoutfile: