Example #1
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    if sys.platform == 'win32':  # fake parallelism for win32
        import sklearn.externals.joblib.parallel as joblib_par
        _mp = joblib_par.multiprocessing
        joblib_par.multiprocessing = None
        try:
            spca = SparsePCA(n_components=3, n_jobs=2, random_state=0,
                             alpha=alpha).fit(Y)
            U2 = spca.transform(Y)
        finally:
            joblib_par.multiprocessing = _mp
    else:  # we can efficiently use parallelism
        spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,
                         random_state=0).fit(Y)
        U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
Example #2
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3,
                          method='lars',
                          alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(n_components=3,
                     n_jobs=2,
                     method='lars',
                     alpha=alpha,
                     random_state=0).fit(Y)
    U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3,
                           method='cd',
                           random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    if sys.platform == 'win32':  # fake parallelism for win32
        import sklearn.externals.joblib.parallel as joblib_par
        _mp = joblib_par.multiprocessing
        joblib_par.multiprocessing = None
        try:
            spca = SparsePCA(n_components=3, n_jobs=2, random_state=0,
                             alpha=alpha).fit(Y)
            U2 = spca.transform(Y)
        finally:
            joblib_par.multiprocessing = _mp
    else:  # we can efficiently use parallelism
        spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,
                         random_state=0).fit(Y)
        U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
Example #4
0
def test_fit_transform_parallel():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(
        n_components=3, n_jobs=2, method="lars", alpha=alpha, random_state=0
    ).fit(Y)
    U2 = spca.transform(Y)
    assert not np.all(spca_lars.components_ == 0)
    assert_array_almost_equal(U1, U2)
Example #5
0
def test_fit_transform_parallel():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,
                     random_state=0).fit(Y)
    U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
Example #6
0
def pca_svm(filename):

    data = pd.read_csv('archive/' +filename,
        usecols=['label', 'tweet']
    )

    vectorizer = TfidfVectorizer()
    vectorized = vectorizer.fit_transform(data['tweet'])
    vectorized=vectorized.todense()

    X_tr, X_te, y_tr, y_te = train_test_split(vectorized, data['label'],test_size = 0.2)


    pca = SparsePCA()
    X_tr = pca.fit_transform(X_tr)
    X_te = pca.transform(X_te)
    clf = SVC(kernel = 'rbf')
    clf.fit(X_tr, y_tr)
    y_pred = clf.predict(X_te)
    y_pred_tr = clf.predict(X_tr)


    accuracy = accuracy_score(y_te, y_pred)
    accuracy_train = accuracy_score(y_tr, y_pred_tr)


    plot_confusion_matrix(clf, X_te, y_te)
    plt.show()
Example #7
0
class SparsePCA():
    def __init__(self, cols, n_components):
        self.n_components = n_components
        self.model = SparsePCA(n_components=n_components)
        self.columns = cols

    def fit(self, data):
        self.model.fit(data[self.columns])

    def fit_transform(self, data):
        transformed = self.model.fit_transform(data[self.columns])
        transformed = pd.DataFrame(
            transformed,
            columns=["spca_" + str(i + 1) for i in range(self.n_components)])
        data = pd.concat([data, transformed], axis=1)
        data = data.drop(self.columns, axis=1)
        return data

    def transform(self, data):
        transformed = self.model.transform(data[self.columns])
        transformed = pd.DataFrame(
            transformed,
            columns=["spca_" + str(i + 1) for i in range(self.n_components)])
        data = pd.concat([data, transformed], axis=1)
        data = data.drop(self.columns, axis=1)
        return data
Example #8
0
def test_scaling_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
    spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=rng)
    results_train = spca_lars.fit_transform(Y)
    results_test = spca_lars.transform(Y[:10])
    assert_allclose(results_train[0], results_test[0])
 def sparse_pca(self):
     """
     Runs PCA on view and returns projected view, the principle components,
     and explained variance.
     """
     model = SparsePCA(n_components=param['components'], alpha=param['sparse_pca_alpha'])
     model.fit(self.view)
     return model.transform(self.view), model.components_
def test_scaling_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=rng, normalize_components=True)
    results_train = spca_lars.fit_transform(Y)
    results_test = spca_lars.transform(Y[:10])
    assert_allclose(results_train[0], results_test[0])
Example #11
0
class SPCAEstimator():
    def __init__(self, n_components, alpha=10.0):
        self.n_components = n_components
        self.whiten = False
        self.alpha = alpha  # higher alpha => sparser components
        #self.transformer = MiniBatchSparsePCA(n_components, alpha=alpha, n_iter=100,
        #    batch_size=max(20, n_components//5), random_state=0, normalize_components=True)
        self.transformer = SparsePCA(
            n_components,
            alpha=alpha,
            ridge_alpha=0.01,
            max_iter=100,
            random_state=0,
            n_jobs=-1,
            normalize_components=True)  # TODO: warm start using PCA result?
        self.batch_support = False  # maybe through memmap and HDD-stored tensor
        self.stdev = np.zeros((n_components, ))
        self.total_var = 0.0

    def get_param_str(self):
        return "spca_c{}_a{}{}".format(self.n_components, self.alpha,
                                       '_w' if self.whiten else '')

    def fit(self, X):
        self.transformer.fit(X)

        # Save variance for later
        self.total_var = X.var(axis=0).sum()

        # Compute projected standard deviations
        # NB: cannot simply project with dot product!
        self.stdev = self.transformer.transform(X).std(
            axis=0)  # X = (n_samples, n_features)

        # Sort components based on explained variance
        idx = np.argsort(self.stdev)[::-1]
        self.stdev = self.stdev[idx]
        self.transformer.components_[:] = self.transformer.components_[idx]

        # Check orthogonality
        dotps = [
            np.dot(*self.transformer.components_[[i, j]])
            for (i, j) in itertools.combinations(range(self.n_components), 2)
        ]
        if not np.allclose(dotps, 0, atol=1e-4):
            print('SPCA components not orghogonal, max dot',
                  np.abs(dotps).max())

    def get_components(self):
        var_ratio = self.stdev**2 / self.total_var
        return self.transformer.components_, self.stdev, var_ratio  # SPCA outputs are normalized
Example #12
0
class SparsePCAImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Example #13
0
class DimensionalityReducer(object):
    
    def __init__(self):
        self.sc = None
        self.pca = None
    
    def fitPCA(self, X, nfeats=3):
        self.sc = StandardScaler()
        self.pca = SparsePCA(n_components=nfeats)
        self.pca.fit(self.sc.fit_transform(X))
        
    def transformPCA(self, X):
        components = self.pca.transform(self.sc.transform(X))
        return components
Example #14
0
def WeightsEstimatedFromSparsePCA(ret_port, n_com=25):
    tf = SparsePCA(n_components=n_com)  # , random_state=0)
    tf.fit(ret_port.agg(lambda x: x - x.mean()).fillna(0.0))  # 注意量级
    tf.transform(
        ret_port.fillna(0.0)
    )  # .apply(lambda x:x.where(~x.isnull(),x.mean()),axis=0))#,index=date_investing[date_investing<'2019-12'])
    # 根据组合的组合的平均收益,调整组合的符号
    weights = pd.DataFrame(tf.components_, columns=signal_names.split(',')).T
    ret_transformed_port = (cov_chara_ret.fillna(0.0) @ weights).replace(
        0.0, np.nan)
    for c in weights.columns:
        weights[c] = weights[c] * np.sign(
            ret_transformed_port[c].mean()) / np.abs(weights[c]).sum()
    ret_transformed_port = (cov_chara_ret.fillna(0.0) @ weights).replace(
        0.0, np.nan)
    # 按t值选,还是按SR选择
    select_port = np.abs(
        PortfolioAnalysis(ret_transformed_port.dropna(
            how='all',
            axis=1))).T.sort_values(by='SR',
                                    ascending=False).index[:int(n_com * 0.67)]
    for p in select_port:
        weights[p] *= np.sign(ret_transformed_port[p].mean())
    return weights[select_port]
Example #15
0
def WeightsEstimatedFromSparsePCAWithWeightedCovariance(ret_p, n_com=30):
    ret_port = ret_p.dropna(how='all', axis=1)
    tf = SparsePCA(n_components=n_com)  # , random_state=0)
    cov_matrix = WeightedCovariance(ret_port)
    tf.fit(cov_matrix)  # 注意量级
    tf.transform(
        ret_port.fillna(0.0)
    )  # .apply(lambda x:x.where(~x.isnull(),x.mean()),axis=0))#,index=date_investing[date_investing<'2019-12'])
    # 根据组合的组合的平均收益,调整组合的符号
    weights = pd.DataFrame(tf.components_, columns=cov_matrix.columns).T
    ret_transformed_port = (ret_port.fillna(0.0) @ weights).replace(
        0.0, np.nan)
    for c in ret_transformed_port.columns:
        weights[c] = weights[c] * np.sign(
            ret_transformed_port[c].mean()) / np.abs(weights[c]).sum()
    ret_transformed_port = (ret_port.fillna(0.0) @ weights).replace(
        0.0, np.nan)
    # 按t值选,还是按SR选择
    select_port = np.abs(
        PortfolioAnalysis(ret_transformed_port)).T.sort_values(
            by='SR', ascending=False).index
    for p in select_port:
        weights[p] *= np.sign(ret_transformed_port[p].mean())
    return weights[select_port]
Example #16
0
def test_pca_vs_spca():
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
    Z, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
    spca = SparsePCA(alpha=0, ridge_alpha=0, n_components=2)
    pca = PCA(n_components=2)
    pca.fit(Y)
    spca.fit(Y)
    results_test_pca = pca.transform(Z)
    results_test_spca = spca.transform(Z)
    assert_allclose(np.abs(spca.components_.dot(pca.components_.T)),
                    np.eye(2), atol=1e-5)
    results_test_pca *= np.sign(results_test_pca[0, :])
    results_test_spca *= np.sign(results_test_spca[0, :])
    assert_allclose(results_test_pca, results_test_spca)
def test_pca_vs_spca():
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
    Z, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
    spca = SparsePCA(alpha=0, ridge_alpha=0, n_components=2,
                     normalize_components=True)
    pca = PCA(n_components=2)
    pca.fit(Y)
    spca.fit(Y)
    results_test_pca = pca.transform(Z)
    results_test_spca = spca.transform(Z)
    assert_allclose(np.abs(spca.components_.dot(pca.components_.T)),
                    np.eye(2), atol=1e-5)
    results_test_pca *= np.sign(results_test_pca[0, :])
    results_test_spca *= np.sign(results_test_spca[0, :])
    assert_allclose(results_test_pca, results_test_spca)
Example #18
0
class SPCA:
    def __init__(self, rfe_cv, *args, **kwargs):
        self.rfe = None
        self.rfe_cv = rfe_cv
        self.model = SparsePCA(*args, **kwargs)

    def fit(self, X, y):
        Z = numpy.concatenate([X, y.reshape(-1, 1)], axis=1)
        Z = numpy.array(Z, dtype=numpy.float32)
        Z[Z == numpy.inf] = numpy.nan
        Z[Z == -numpy.inf] = numpy.nan
        X_, y_ = X[~pandas.isna(Z).any(axis=1), :], y[~pandas.isna(Z).any(
            axis=1)]
        if Z.shape[0] != X.shape[0]:
            print(
                'FIT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}'
                .format(X.shape[0] - X_.shape[0]))
        if self.rfe_cv:
            raise Exception("PCA could not be processed with RFE_CV")
        else:
            self.model.fit(X_)

    def predict(self, X):
        Z = numpy.concatenate([X], axis=1)
        Z = numpy.array(Z, dtype=numpy.float32)
        Z[Z == numpy.inf] = numpy.nan
        Z[Z == -numpy.inf] = numpy.nan
        nan_mask = ~pandas.isna(Z).any(axis=1)
        X_ = X[nan_mask, :]
        if Z.shape[0] != X.shape[0]:
            print(
                'PREDICT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}'
                .format(X.shape[0] - X_.shape[0]))
        if self.rfe_cv:
            raise Exception("PCA could not be processed with RFE_CV")
        else:
            predicted = self.model.transform(X_)
            Z = numpy.full(shape=(X.shape[0], predicted.shape[1]),
                           fill_value=numpy.nan,
                           dtype=numpy.float64)
            Z[nan_mask, :] = predicted
        return Z
Example #19
0
    def tu_spca(self, dataname="kong", components_n=1, data=None):

        #测试数据
        X, y = make_blobs(n_samples=10000,
                          n_features=3,
                          centers=[[3, 3, 3], [0, 0, 0], [1, 1, 1], [2, 2, 2]],
                          cluster_std=[0.2, 0.1, 0.2, 0.2],
                          random_state=9)
        if data == None:
            data = X

        message = []
        #训练数据
        spca = SparsePCA(n_components=components_n,
                         normalize_components=True,
                         random_state=0)
        spca.fit(X)
        #保存数据
        value = spca.transform(X)
        save_helper.save_txt_helper(value, dataname)

        components = spca.components_
        error = spca.error_
        page2 = Page()
        #绘图
        for j in range(0, components.shape[0]):
            bar1 = Bar("稀疏组建" + str(j))
            bar1.add("", [
                "components_" + str(i) for i in range(0, components.shape[1])
            ], components[j])
            page2.add(bar1)
        message.append("我们仅提供稀疏组建和数据误差供给分析")

        print(error)
        bar2 = Bar("数据误差分析")
        bar2.add("", ["error" + str(i) for i in range(0, len(error))], error)
        page2.add(bar2)
        save_helper.save_tu_helper(page2, dataname)

        return message
Example #20
0
        # print (matrix_KPCA_BF)
        # print ("get KPCA mean matrix")
        # print (matrix_KPCA_mean)
        matrix_KPCA_BF.to_csv(gl.get_value("outputFile") + "_KPCA_BF.txt",
                              sep='\t',
                              header=True,
                              index=True)
        matrix_KPCA_mean.to_csv(gl.get_value("outputFile") + "_KPCA_mean.txt",
                                sep='\t',
                                header=True,
                                index=True)

    if gl.get_value("SPCA_Flag"):
        spca = SparsePCA(n_components=gl.get_value("SPCA_n_components"))
        spca.fit(wholeData)
        expre_SPCA = spca.transform(expre_data)
        # print ("get SPCA data")
        matrix_SPCA = Methods.get_matrix_dist(
            data=expre_SPCA,
            lab=lab,
            clusters=clusters,
            average_number=gl.get_value("SPCA_AvgNum"),
            caculation_number=gl.get_value("SPCA_CalNum"))
        # print ("get SPCA matrix")
        matrix_SPCA_BF = Methods.disMatrix_to_bfMatrix(matrix_SPCA, clusters)
        matrix_SPCA_mean = Methods.disMatrix_to_meanMatrix(
            matrix_SPCA, clusters)
        # print ("get SPCA BF matrix")
        # print (matrix_SPCA_BF)
        matrix_SPCA_BF.to_csv(gl.get_value("outputFile") + "_SPCA_BF.txt",
                              sep='\t',
Example #21
0
def get_cv_accuracy(dpath, site, dtype, description,
                    RESULTPATH,
                    k_tune_params={},
                    knn_params={},
                    USE_NCA=False,
                    graphParams={},
                    nca_train_params={},
                    elastic_net_params={},
                    USE_PCA=False,
                    USE_BAGGING=False,
                    bagging_params={}):
    
    """
    Get KNN cross validation accuracy with or without PCA and NCA
    """
    
    # Get a dict of function params and save
    params_all = locals()
    with open(RESULTPATH + description + \
                      'params_all.pkl','wb') as f:
        _pickle.dump(params_all, f)
    
    #%% =======================================================================
    # Define relevant methods
    #==========================================================================  
    
    def _get_numpc_optim(feats_train, feats_valid,
                         T_train, C_train,
                         T_valid, C_valid):
        
        """
        Given PCA-transformed traing and validation sets,
        find the optimal no of principal components to 
        maximize the Ci
        """
        print("\nFinding optimal number of PC's.")   
        print("\n\tnumpc\tCi")
        print("\t--------------")
        
        cis = []
        
        numpc_max = np.min([feats_train.shape[1], 200])
        
        for numpc in range(4, numpc_max, 4):
            feats_train_new = feats_train[:, 0:numpc]
            feats_valid_new = feats_valid[:, 0:numpc]
            # get neighbor indices    
            neighbor_idxs = knnmodel._get_neighbor_idxs(feats_valid_new, 
                                                        feats_train_new, 
                                                        norm = norm)
            # Predict validation set
            _, Ci = knnmodel.predict(neighbor_idxs,
                                     Survival_train=T_train, 
                                     Censored_train=C_train, 
                                     Survival_test =T_valid, 
                                     Censored_test =C_valid, 
                                     K=elastic_net_params['K'], 
                                     Method = Method)
            
            cis.append([numpc, Ci])
            print("\t{}\t{}".format(numpc, Ci))
        
        # now get optimal no of PC's
        cis = np.array(cis)
        numpc_optim = cis[cis[:,1].argmax(), 0]
        print("\nnumpc_optim = {}".format(round(numpc_optim, 3)))
            
        return int(numpc_optim)

    #%% =======================================================================
    # Begin main body
    #==========================================================================    
    
    print("\n--------------------------------------")
    print("Getting cv accuracy: {}, {}".format(site, dtype))
    print("--------------------------------------\n")
    
    print("Loading data.")
    
    #Data = loadmat(dpath)
    #Features = Data[dtype + '_X'] 
    #N = Features.shape[0]

    
    with open(dpath.split('.mat')[0] + '_splitIdxs.pkl','rb') as f:
        splitIdxs = _pickle.load(f)
    
    #
    # result structure
    #
    
    RESULTPATH_NCA = RESULTPATH + "nca/"
    RESULTPATH_KNN = RESULTPATH + "knn/"
    LOADPATH = None
    
    os.system('mkdir ' + RESULTPATH_NCA)
    os.system('mkdir ' + RESULTPATH_KNN)
    
    # Go through outer folds, optimize and get accuracy
    #==========================================================================
    
    # Instantiate a KNN survival model.
    knnmodel = knn.SurvivalKNN(RESULTPATH_KNN, description=description)
    
    #
    # initialize
    #
    
    n_outer_folds = len(splitIdxs['idx_optim'])
    n_folds = len(splitIdxs['fold_cv_test'][0])
    
    CIs = np.zeros([n_folds, n_outer_folds])
    
    #
    # itirate through folds
    #
    
    #outer_fold = 0
    for outer_fold in range(n_outer_folds):
            
        print("\nOuter fold {} of {}\n".format(outer_fold, n_outer_folds-1))
        
        # Note, this is done for each outer loop
        # since they will be modified locally in each outer loop
        print("Loading data ...")
        Data = loadmat(dpath)
        X = Data[dtype + '_X'].copy()
        N = X.shape[0]
        Survival = Data['Survival'].reshape([N,])
        Censored = Data['Censored'].reshape([N,])
        Data = None
        
        # Isolate optimization set (and divide into training and validation)
        optimIdxs = splitIdxs['idx_optim'][outer_fold]
        
        if (USE_NCA or USE_PCA):
            stoppoint = int(elastic_net_params['VALID_RATIO'] * len(optimIdxs))
            optimIdxs_valid = optimIdxs[0:stoppoint]
            optimIdxs_train = optimIdxs[stoppoint:]
            x_train = X[optimIdxs_train, :]
            x_valid = X[optimIdxs_valid, :]
        
        #%% ===================================================================
        # Unsupervised dimensionality reduction - PCA
        #======================================================================
        
        if USE_PCA:
            
            # Find optimal number of PC's           
            pca = PCA()
            x_train = pca.fit_transform(x_train)
            x_valid = pca.transform(x_valid)
            
            # keep optimal number of PC's
            numpc_optim = _get_numpc_optim(feats_train=x_train,
                                           feats_valid=x_valid,
                                           T_train=Survival[optimIdxs_train],
                                           C_train=Censored[optimIdxs_train],
                                           T_valid=Survival[optimIdxs_valid],
                                           C_valid=Censored[optimIdxs_valid])
            x_train = x_train[:, 0:numpc_optim]
            x_valid = x_valid[:, 0:numpc_optim]
            
            # Now learn final PC matrix on full optimization set
            print("\nLearning final PCA matrix.")            
            pca = PCA(n_components=numpc_optim)
            pca.fit(X[optimIdxs, :])
            X = pca.transform(X)
            
        
        #%% ===================================================================
        # Supervized dimensionality reduction - NCA
        #======================================================================
        
        if USE_NCA:
            
            # instantiate NCA model
            ncamodel = nca.SurvivalNCA(RESULTPATH_NCA, 
                                       description = description, 
                                       LOADPATH = LOADPATH)
            #                          
            # Finding optimal values for ALPHA and LAMBDA (regularization)
            #
            
            ALPHAS = np.arange(0, 1.1, 0.2)
            LAMBDAS = np.arange(0, 1.1, 0.2)
    
            cis = []
            
            for ALPHA in ALPHAS:
                for LAMBDA in LAMBDAS:
                    
                    if ((LAMBDA == 0) and (ALPHA > ALPHAS.min())):
                        continue
            
                    graphParams['ALPHA'] = ALPHA
                    graphParams['LAMBDA'] = LAMBDA
                    
                    w = ncamodel.train(features = x_train,
                                       survival = Survival[optimIdxs_train],
                                       censored = Censored[optimIdxs_train],
                                       COMPUT_GRAPH_PARAMS = graphParams,
                                       **nca_train_params)
                    W = np.zeros([len(w), len(w)])
                    np.fill_diagonal(W, w)
                    
                    ncamodel.reset_TrainHistory()
                    
                    # transform
                    x_valid_transformed = np.dot(x_valid, W)
                    x_train_transformed = np.dot(x_train, W)
                    
                    # get neighbor indices    
                    neighbor_idxs = knnmodel._get_neighbor_idxs(x_valid_transformed, 
                                                                x_train_transformed, 
                                                                norm = norm)
                    
                    # Predict validation set
                    _, Ci = knnmodel.predict(neighbor_idxs,
                                             Survival_train=Survival[optimIdxs_train], 
                                             Censored_train=Censored[optimIdxs_train], 
                                             Survival_test = Survival[optimIdxs_valid], 
                                             Censored_test = Censored[optimIdxs_valid], 
                                             K = elastic_net_params['K'], 
                                             Method = Method)
                    
                    cis.append([ALPHA, LAMBDA, Ci])
                    
                    print("\n----------------------")
                    print("ALPHA\tLAMBDA\tCi")
                    print("{}\t{}\t{}".format(ALPHA, LAMBDA, round(Ci, 3)))
                    print("----------------------\n")
            
            cis = np.array(cis)
            optimal = cis[:,2].argmax()
            ALPHA_OPTIM = cis[optimal, 0]
            LAMBDA_OPTIM = cis[optimal, 1]
            
            print("\nOptimal Alpha, Lambda = {}, {}".format(ALPHA_OPTIM, LAMBDA_OPTIM))
            
            #           
            # Learn final NCA matrix on optimization set
            #
            
            print("\nLearning final NCA matrix\n")
            
            graphParams['ALPHA'] = ALPHA_OPTIM
            graphParams['LAMBDA'] = LAMBDA_OPTIM
    
            # Learn NCA matrix
            w = ncamodel.train(features = X[optimIdxs, :],
                               survival = Survival[optimIdxs],
                               censored = Censored[optimIdxs],
                               COMPUT_GRAPH_PARAMS = graphParams,
                               **nca_train_params)
            W = np.zeros([len(w), len(w)])
            np.fill_diagonal(W, w)
            
            # Transform features according to learned nca model
            X = np.dot(X, W)
            
        #%% ===================================================================    
        # Now get accuracy
        #======================================================================
        
        print("\nGetting accuracy.") 
        ci, _ = knnmodel.cv_accuracy(X, Survival, Censored, 
                                     splitIdxs, outer_fold=outer_fold,
                                     k_tune_params=k_tune_params,
                                     USE_BAGGING=USE_BAGGING,
                                     bagging_params=bagging_params)
        # record result
        CIs[:, outer_fold] = ci
    
    #%%    
    print("\nAccuracy")
    print("------------------------")
    print("25th percentile = {}".format(np.percentile(CIs, 25)))
    print("50th percentile = {}".format(np.percentile(CIs, 50)))
    print("75th percentile = {}".format(np.percentile(CIs, 75)))
    
    # Save results
    print("\nSaving final results.")
    with open(RESULTPATH + description + 'testing_Ci.txt','wb') as f:
        np.savetxt(f, CIs, fmt='%s', delimiter='\t')
# Sparse PCA
from sklearn.decomposition import SparsePCA

n_components = 27
alpha = 0.0001
random_state = 2018
n_jobs = -1

sparsePCA = SparsePCA(n_components=n_components,
                      alpha=alpha,
                      random_state=random_state,
                      n_jobs=n_jobs)

sparsePCA.fit(X_train.loc[:, :])
X_train_sparsePCA = sparsePCA.transform(X_train)
X_train_sparsePCA = pd.DataFrame(data=X_train_sparsePCA, index=X_train.index)

scatterPlot(X_train_sparsePCA, y_train, "Sparse PCA")

# In[46]:

X_train_sparsePCA_inverse = np.array(X_train_sparsePCA).dot(
    sparsePCA.components_) + np.array(X_train.mean(axis=0))
X_train_sparsePCA_inverse = pd.DataFrame(data=X_train_sparsePCA_inverse,
                                         index=X_train.index)

anomalyScoresSparsePCA = anomalyScores(X_train, X_train_sparsePCA_inverse)
preds = plotResults(y_train, anomalyScoresSparsePCA, True)

# In[47]:
Example #23
0
# Sparse PCA
from sklearn.decomposition import SparsePCA

n_components = 100
alpha = 0.0001
random_state = 2020
n_jobs = -1

sparsePCA = SparsePCA(n_components=n_components,
                      alpha=alpha,
                      random_state=random_state,
                      n_jobs=n_jobs)

sparsePCA.fit(X_train.loc[:10000, :])
X_train_sparsePCA = sparsePCA.transform(X_train)
X_train_sparsePCA = pd.DataFrame(data=X_train_sparsePCA, index=train_index)

X_validation_sparsePCA = sparsePCA.transform(X_validation)
X_validation_sparsePCA = pd.DataFrame(data=X_validation_sparsePCA,
                                      index=validation_index)

scatterPlot(X_train_sparsePCA, y_train, "Sparse PCA")

# In[ ]:

# Kernel PCA
from sklearn.decomposition import KernelPCA

n_components = 100
kernel = 'rbf'
class SPCA(object):
    def __init__(self,
                 n_components=None,
                 alpha=1,
                 ridge_alpha=0.01,
                 max_iter=1000,
                 tol=1e-8,
                 method='lars',
                 n_jobs=None,
                 U_init=None,
                 V_init=None,
                 verbose=False,
                 random_state=None,
                 normalize_components='deprecated'):
        """
        :param n_components:
        :param alpha:
        :param ridge_alpha:
        :param max_iter:
        :param tol:
        :param method:
        :param n_jobs:
        :param U_init:
        :param V_init:
        :param verbose:
        :param random_state:
        :param normalize_components:
        """
        self.model = SparsePCA(n_components=n_components,
                               alpha=alpha,
                               ridge_alpha=ridge_alpha,
                               max_iter=max_iter,
                               tol=tol,
                               method=method,
                               n_jobs=n_jobs,
                               U_init=U_init,
                               V_init=V_init,
                               verbose=verbose,
                               random_state=random_state,
                               normalize_components=normalize_components)

    def fit(self, x, y):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        self.model.transform(X=x)

    def fit_transform(self, x, y=None):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self):
        return self.model.get_params(deep=True)

    def set_params(self, **params):
        return self.model.set_params(**params)

    def get_attributes(self):
        components = self.model.components_
        error = self.model.error_
        n_iter = self.model.n_iter_
        mean = self.model.mean_
        return components, error, n_iter, mean
Example #25
0
for index, sentence in enumerate(sentences):
    if labels[index] not in all_jiras:
        all_jiras[labels[index]] = [
            "https://oktainc.atlassian.net/browse/" + str(issues[index]['key'])
        ]
    else:
        all_jiras[
            labels[index]].append("https://oktainc.atlassian.net/browse/" +
                                  str(issues[index]['key']))

unique, counts = np.unique(labels, return_counts=True)
res = dict(zip(unique, counts))
res = sorted(res.items(), key=lambda x: x[1], reverse=True)

pca = SparsePCA(n_components=2).fit(X.toarray())
coords = pca.transform(X.toarray())
label_colors = [
    '#2AB0E9', '#2BAF74', '#D7665E', '#CCCCCC', '#D2CA0D', '#522A64',
    '#A3DB05', '#FC6514'
]
colors = [label_colors[i % len(label_colors)] for i in labels]
plt.scatter(coords[:, 0], coords[:, 1], c=colors)
centroids = clf.cluster_centers_
centroid_coords = pca.transform(centroids)
plt.title("Principal Component Analysis Diagram of Classes")
plt.scatter(centroid_coords[:, 0],
            centroid_coords[:, 1],
            marker='X',
            s=200,
            linewidth=2,
            c='#444d61')
Example #26
0
## get columns which are nonzero in at least one trace
wv_mat = np.zeros((len(wv_coefs), len(wv_coefs[0]["coef"])))
files = []
channels = []
for i, coef in enumerate(wv_coefs):
    wv_mat[i, :] = coef["coef"]
    files.append(coef["file"])
    channels.append(coef["channel"])

wv_mat = wv_mat[:, np.where(wv_mat.any(axis=0))[0]]

wv_df = pd.DataFrame({"fname": files, "channel": channels})
wv_df = pd.merge(wv_df, metadata)
pca_wv = SparsePCA(n_components=4, ridge_alpha=2, alpha=1).fit(wv_mat)
scores = pca_wv.transform(wv_mat)

wv_df = pd.DataFrame({
    "x": scores[:, 0],
    "y": scores[:, 1],
    "z": scores[:, 2],
    "fname": files,
    "channel": channels
})
wv_df = pd.merge(wv_df, metadata)

(ggplot(wv_df) +
 geom_point(aes(x="x", y="y", size="z", color="genotype", shape="target")) +
 scale_size_continuous(range=(0.3, 0.7)) +
 # ylim(-0.7, 0.55) +
 # xlim(-1.5, 2.6) +
Example #27
0
            if idx_arr != idx_lopo_cv
        ]
        training_label = [
            arr for idx_arr, arr in enumerate(label_bal)
            if idx_arr != idx_lopo_cv
        ]
        # Concatenate the data
        training_data = np.vstack(training_data)
        training_label = np.ravel(
            label_binarize(np.hstack(training_label).astype(int), [0, 255]))
        print 'Create the training set ...'

        # Learn the PCA projection
        pca = SparsePCA(n_components=sp)
        training_data = pca.fit_transform(training_data)
        testing_data = pca.transform(testing_data)

        # Perform the classification for the current cv and the
        # given configuration
        crf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
        pred_prob = crf.fit(
            training_data,
            np.ravel(training_label)).predict_proba(testing_data)

        result_cv.append([pred_prob, crf.classes_])

    results_sp.append(result_cv)

# Save the information
path_store = '/data/prostate/results/mp-mri-prostate/exp-3/selection-extraction/sparse-pca/mrsi'
if not os.path.exists(path_store):
Example #28
0
n_comp = 12

# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(train.drop(["y"], axis=1))
tsvd_results_test = tsvd.transform(test)

# PCA
# pca = PCA(n_components=n_comp, random_state=420)
# pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
# pca2_results_test = pca.transform(test)

#sparse PCA
spca = SparsePCA(n_components=n_comp, random_state=420)
spca2_results_train = spca.fit_transform(train.drop(["y"], axis=1))
spca2_results_test = spca.transform(test)

#Kernel PCA
kpca = KernelPCA(n_components=n_comp, random_state=420)
kpca2_results_train = kpca.fit_transform(train.drop(["y"], axis=1))
kpca2_results_test = kpca.transform(test)

# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train.drop(["y"], axis=1))
grp_results_test = grp.transform(test)
def transform(xTrain,yTrain,xTest):
    pca = SparsePCA(n_components=2);
    newXTrain =  pca.fit_transform(xTrain,yTrain)
    newXTest = pca.transform(xTest)
    return newXTrain,newXTest   
Example #30
0
 def sparse_pca(data, dim=3):
     transformer = SparsePCA(n_components=dim, random_state=0)
     transformer.fit(data)
     result = transformer.transform(data)
     return result
Example #31
0
sns.scatterplot(data=Xn_indexed,
                x="retail_and_recreation_percent_change_from_baseline",
                y="grocery_and_pharmacy_percent_change_from_baseline",
                hue="Rt_binarized")
plt.show()

# 2D projection
sparse_pca = SparsePCA(n_components=2, random_state=0, alpha=2)
X_scaled = minmax_scale(X)
sparse_pca.fit(X=X_scaled)
print(
    pd.DataFrame(sparse_pca.components_,
                 columns=[
                     _.replace("_percent_change_from_baseline", "")
                     for _ in X.columns
                 ]))
X_tf = sparse_pca.transform(X_scaled)
X_tf_Rt = pd.DataFrame(X_tf, columns=["X1", "X2"])
X_tf_Rt["Rt"] = X["Rt_binarized"]
sns.scatterplot(data=X_tf_Rt, x="X1", y="X2", hue="Rt_binarized")
plt.show()

ax = fig.add_subplot(111, projection='3d')

svc = LinearSVC(random_state=0,
                penalty="l1",
                loss="squared_hinge",
                dual=False,
                max_iter=10000)
svc.fit(X=X_normed, y=X["Rt_binarized"])
Example #32
0
    def fit_model(self):
        #for filename in glob.glob(os.path.join(self.data_path, '*MP034_2017-09-11.mat')):
        #for filename in glob.glob(os.path.join(self.data_path, '*.mat')):
        #self.mat_file_lst=[#'natimg2800_M170717_MP034_2017-09-11.mat',#'natimg2800_M160825_MP027_2016-12-14.mat',
#'natimg2800_M161025_MP030_2017-05-29.mat'#,
#'natimg2800_M170604_MP031_2017-06-28.mat','natimg2800_M170714_MP032_2017-09-14.mat','natimg2800_M170714_MP032_2017-08-07.mat','natimg2800_M170717_MP033_2017-08-20.mat'
#]
        for filename in self.mat_file_lst:
            print(filename)
            data = io.loadmat(self.data_path+filename)
            resp = data['stim'][0]['resp'][0]
            spont =data['stim'][0]['spont'][0]
            if self.model=='EnsemblePursuit':
                X=subtract_spont(spont,resp)
                for lambd_ in self.lambdas:
                    neuron_init_dict={'method':'top_k_corr','parameters':{'n_av_neurons':100,'n_of_neurons':1,'min_assembly_size':8}}
                    print(str(neuron_init_dict['parameters']['n_av_neurons']))
                    ep=EnsemblePursuitPyTorch()
                    start=time.time()
                    U_V,nr_of_neurons,U,V, cost_lst,seed_neurons,ensemble_neuron_lst=ep.fit_transform(X,lambd_,self.nr_of_components,neuron_init_dict)
                    end=time.time()
                    tm=end-start
                    print('Time', tm)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_V_ep.npy',V)

                    np.save(self.save_path+filename+'_V_ep.npy',V)

                    np.save(self.save_path+filename+'_U_ep.npy',U)

                    np.save(self.save_path+filename+'_ensemble_pursuit_lst_ep.npy',ensemble_neuron_lst)
                    np.save(self.save_path+filename+'_seed_neurons_ep.npy', seed_neurons)
                    np.save(self.save_path+filename+'_time_ep.npy', tm)

                   

#np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_U_ep.npy',U)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_cost_ep.npy',cost_lst)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_n_neurons_ep.npy',nr_of_neurons)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_ensemble_neuron_lst.npy',ensemble_neuron_lst)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_time_ep.npy',tm)
                    #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_seed_neurons.npy',seed_neurons)
            if self.model=='SparsePCA':
                X=subtract_spont(spont,resp)
                X=stats.zscore(X)
                print(X.shape)
                for alpha in self.alphas:
                    sPCA=SparsePCA(n_components=self.nr_of_components,alpha=alpha,random_state=7, max_iter=100, n_jobs=-1,verbose=1)
                    #X=X.T
                    start=time.time()
                    model=sPCA.fit(X)
                    end=time.time()
                    elapsed_time=end-start
                    U=model.components_
                    print('U',U.shape)
                    #errors=model.error_
                    V=sPCA.transform(X)
                    print('V',V.shape)
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_U_sPCA.npy',U)
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_V_sPCA.npy',V)
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_time_sPCA.npy',elapsed_time)
                    #np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_errors_sPCA.npy',errors)
            if self.model=='NMF':
                 X=subtract_spont(spont,resp)
                 X-=X.min(axis=0)
                 for alpha in self.alphas:
                    model = NMF(n_components=self.nr_of_components, init='nndsvd', random_state=7,alpha=alpha)
                    start=time.time()
                    V=model.fit_transform(X)
                    end=time.time()
                    time_=end-start
                    print(end-start)
                    U=model.components_
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_U_NMF.npy',U)
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_V_NMF.npy',V)
                    np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_time_NMF.npy',time_)
            if self.model=='PCA':
                  X=subtract_spont(spont,resp)
                  X=stats.zscore(X)
                  pca=PCA(n_components=self.nr_of_components)
                  start=time.time()
                  V=pca.fit_transform(X)
                  U=pca.components_
                  end=time.time()
                  elapsed_time=end-start
                  #V=pca.components_
                  var=pca.explained_variance_
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_V_pca.npy',V)
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_time_pca.npy',elapsed_time)
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_var_pca.npy',var)
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_U_pca.npy',U)
            if self.model=='LDA':
                  X=resp
                  X-=X.min(axis=0)
                  lda=LatentDirichletAllocation(n_components=self.nr_of_components, random_state=7)
                  start=time.time()
                  V=lda.fit_transform(X)
                  end=time.time()
                  elapsed_time=end-start
                  print('time',elapsed_time)
                  U=lda.components_
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_V_lda.npy',V)
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_U_lda.npy',U) 
                  np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_time_lda.npy',elapsed_time) 
Example #33
0
    def fit_model(self):
        for filename in self.mat_file_lst:
            print(filename)
            data = io.loadmat(self.data_path + filename)
            resp = data['stim'][0]['resp'][0]
            spont = data['stim'][0]['spont'][0]
            if self.model == 'EnsemblePursuit_numpy':
                X = subtract_spont(spont, resp).T
                options_dict = {
                    'seed_neuron_av_nr': 100,
                    'min_assembly_size': 8
                }
                ep_np = EnsemblePursuitNumpy(n_ensembles=self.nr_of_components,
                                             lambd=self.lambd_,
                                             options_dict=options_dict)
                start = time.time()
                U, V = ep_np.fit_transform(X)
                end = time.time()
                tm = end - start
                print('Time', tm)
                np.save(self.save_path + filename + '_V_ep_numpy.npy', V)
                np.save(self.save_path + filename + '_U_ep_numpy.npy', U)
                np.save(self.save_path + filename + '_timing_ep_numpy.npy', tm)
            if self.model == 'EnsemblePursuit_pytorch':
                X = subtract_spont(spont, resp).T
                options_dict = {
                    'seed_neuron_av_nr': 100,
                    'min_assembly_size': 8
                }
                ep_pt = EnsemblePursuitPyTorch(
                    n_ensembles=self.nr_of_components,
                    lambd=self.lambd_,
                    options_dict=options_dict)
                start = time.time()
                U, V = ep_pt.fit_transform(X)
                end = time.time()
                tm = end - start
                print('Time', tm)
                np.save(self.save_path + filename + '_V_ep_pytorch.npy', V)
                np.save(self.save_path + filename + '_U_ep_pytorch.npy', U)
                np.save(self.save_path + filename + '_timing_ep_pytorch.npy',
                        tm)
            if self.model == 'EnsemblePursuit_adaptive':
                X = subtract_spont(spont, resp).T
                options_dict = {
                    'seed_neuron_av_nr': 100,
                    'min_assembly_size': 8
                }
                ep_pt = EnsemblePursuitPyTorch(
                    n_ensembles=self.nr_of_components,
                    lambd=self.lambd_,
                    options_dict=options_dict)
                start = time.time()
                U, V = ep_pt.fit_transform(X)
                end = time.time()
                tm = end - start
                print('Time', tm)
                np.save(self.save_path + filename + '_V_ep_adaptive.npy', V)
                np.save(self.save_path + filename + '_U_ep_adaptive.npy', U)

            if self.model == 'SparsePCA':
                X = subtract_spont(spont, resp)
                X = zscore(X)
                sPCA = SparsePCA(n_components=self.nr_of_components,
                                 random_state=7,
                                 max_iter=100,
                                 n_jobs=-1,
                                 verbose=1)
                start = time.time()
                model = sPCA.fit(X)
                end = time.time()
                elapsed_time = end - start
                U = model.components_
                V = sPCA.transform(X)
                np.save(self.save_path + filename + '_U_sPCA.npy', U)
                np.save(self.save_path + filename + '_V_sPCA.npy', V)
                np.save(self.save_path + filename + '_time_sPCA.npy',
                        elapsed_time)
            if self.model == 'ICA':
                X = subtract_spont(spont, resp)
                X = zscore(X)
                ICA = FastICA(n_components=self.nr_of_components,
                              random_state=7)
                start = time.time()
                V = ICA.fit_transform(X)
                end = time.time()
                elapsed_time = end - start
                U = ICA.components_
                np.save(self.save_path + filename + '_U_ICA.npy', U)
                np.save(self.save_path + filename + '_V_ICA.npy', V)
                np.save(self.save_path + filename + '_time_ICA.npy',
                        elapsed_time)
Example #34
0
    cov_tmp[cov_EWMA.columns] @ V_tmp[:, w_top5]).loc['t_NW_adjusted']
pd.DataFrame(V_tmp[:, w_top5], index=cov_EWMA.columns).mean(axis=1)

# todo EWMA+同频历史数据

# Sparse PCA
from sklearn.decomposition import SparsePCA
transformer = SparsePCA(n_components=30)  #, random_state=0)
# todo 输入原始矩阵(+标准化)还是输入协方差矩阵???
# 注意输入的变量量级不要太小,也不要太大,100比较适合(未/std)
transformer.fit(
    cov_chara_ret.dropna(how='all',
                         axis=0).agg(lambda x: x - x.mean()).fillna(0.0) *
    10000.0)  #.cov()*1e4)
transformer.transform(
    cov_chara_ret.dropna(how='all', axis=0).fillna(0.0)
)  #.apply(lambda x:x.where(~x.isnull(),x.mean()),axis=0))#,index=date_investing[date_investing<'2019-12'])
# 根据组合的组合的平均收益,调整组合的符号
weights = pd.DataFrame(transformer.components_,
                       columns=signal_names.split(',')).T
weights
ret_transformed_port = (
    cov_chara_ret.fillna(0.0) @ transformer.components_.T).replace(
        0.0, np.nan)
ret_transformed_port
for c in weights.columns:
    # 调整权重的符号,并且scale权重
    weights[c] = weights[c] * np.sign(ret_transformed_port[c].mean()) / np.abs(
        weights[c]).sum()
ret_transformed_port = (cov_chara_ret.fillna(0.0) @ weights).replace(
    0.0, np.nan)
                     normalize_components=True,
                     random_state=1000)
    spca.fit(X)

    # Show the components
    sns.set()

    fig, ax = plt.subplots(3, 10, figsize=(22, 8))

    for i in range(3):
        for j in range(10):
            ax[i, j].imshow(spca.components_[(3 * j) + i].reshape((8, 8)),
                            cmap='gray')
            ax[i, j].set_xticks([])
            ax[i, j].set_yticks([])

    plt.show()

    # Transform X[0]
    y = spca.transform(X[0].reshape(1, -1)).squeeze()

    # Show the absolute magnitudes
    fig, ax = plt.subplots(figsize=(22, 10))

    ax.bar(np.arange(1, 31, 1), np.abs(y))
    ax.set_xticks(np.arange(1, 31, 1))
    ax.set_xlabel('Component', fontsize=16)
    ax.set_ylabel('Coefficient (absolute values)', fontsize=16)

    plt.show()
Example #36
0
TPW = np.zeros(100)
FPW = np.zeros(100)

nblocks = np.insert(np.ones(k), np.zeros(Number_of_blocks - k), 0)
print(nblocks)

from tqdm import tqdm
for j in tqdm(range(100)):
    true_block = np.random.permutation(nblocks)
    X = rv.rvs(n)

    SPCA = SparsePCA(n_components=Number_of_blocks)

    Xfit = SPCA.fit(X)

    XPCA = SPCA.transform(X)

    signal = 2 * (np.log(p) / n)**.5

    beta = np.zeros(p)

    counter = 0
    for i in range(Number_of_blocks):
        if true_block[i] == 1:
            beta[counter:counter +
                 blocks_size[i]] = c * signal * np.random.dirichlet(
                     np.ones(blocks_size[i]), 1)
        counter += blocks_size[i]

    y = np.matmul(X, beta) + np.random.normal(0, 1, n)