def plot_PCA(data, features): X = data[features] y = data["categoria"] X_std = StandardScaler().fit_transform(X) acp = sk_pca(n_components=2) Y = acp.fit_transform(X_std) results = [] for name in (2, 3, 13): result = go.Scatter(x=Y[y == name, 0], y=Y[y == name, 1], mode="markers", name=name, marker=go.Marker(size=8, line=go.Line( color="rgba(225,225,225,0.2)", width=0.5), opacity=0.75)) results.append(result) data = go.Data(results) layout = go.Layout(xaxis=go.XAxis(title="CP1", showline=False), yaxis=go.YAxis(title="CP2", showline=False)) fig = go.Figure(data=data, layout=layout) py.iplot(fig) return fig
def pca_initial_gui(data): # Initial PCA function (no standardscaler) feat = (data.values[:, 3:]).astype('float64') ncom = 30 # Initialise skpca1 = sk_pca(n_components=ncom) # Scale the features to have zero mean and standard devisation of 1 # This is important when correlating data with very different variances # nfeat1 = StandardScaler().fit_transform(feat) # Fit the spectral data and extract the explained variance ratio X1 = expl_var_1 = X1.explained_variance_ratio_ # create scree plot fig = plt.figure(dpi=100, figsize=(10, 5)), expl_var_1 * 100, label="Explained Variance %", color='blue', figure=fig) plt.xticks(np.arange(len(expl_var_1)), np.arange(1, len(expl_var_1) + 1)) plt.plot(np.cumsum(expl_var_1) * 100, '-o', label='Cumulative variance %', color='green', figure=fig) plt.xlabel('PC Number') plt.ylabel('Explained Variance (%)') plt.legend() return fig
def get_xi_prob_from_sk(x_all, xp_index, xn_index): log("\n\n\n") log("SK") log("-----") skpca = sk_pca(n_components=2) pcs = skpca.fit_transform(x_all) log_debug("\tSK PCA: ", pcs.shape) pos_pcs = [pcs[idx] for idx in range(len(labels)) if labels[idx] == POSITIVE_CLASS] neg_pcs = [pcs[idx] for idx in range(len(labels)) if labels[idx] == NEGATIVE_CLASS] assert(len(pos_pcs) == len(pos_class_pcs)) assert(len(neg_pcs) == len(neg_class_pcs)) draw_scatter_plot(pcs[:, 0], P[:, 1], pcs[xp_index], pcs[xn_index], labels) get_xi_prob_by_bayes(pos_pcs, neg_pcs, pcs[xp_index], pcs[xn_index])
def pca_final(data, ncomp): # PCA fitting with scores as result # Read the features feat = (data.values[:, 3:]).astype('float32') # Scale the features to have zero mean and standard devisation of 1 # This is important when correlating data with very different variances nfeat1 = StandardScaler().fit_transform(feat) skpca1 = sk_pca(n_components=ncomp) # Transform on the scaled features Xt1 = skpca1.fit_transform(nfeat1) scores = pd.DataFrame(Xt1) return scores
def generate_pca(self, require_dim, test_file=None, output_dir='./fs_result/'):' -----Calculating PCA---- ') out_file_train = open('{0}pca_{1}_{2}_train'.format(output_dir, self.data_name, require_dim), mode='w') pca = sk_pca(n_components=require_dim).fit(self.data_feature) new_feature_train = pca.transform(self.data_feature) print pca.explained_variance_ratio_ for each_sample in range(len(new_feature_train)): combine_str = '{0}'.format(self.data_label[each_sample]) for each_feature in range(require_dim): combine_str += '\t' + str(each_feature+1) + ':' + str(new_feature_train[each_sample][each_feature]) out_file_train.write(combine_str + '\n') out_file_train.close() if test_file is not None: test_label, test_feature = SupervisedFs.modify_input_data(test_file) out_file_test = open('{0}pca_{1}_{2}_test'.format(output_dir, test_file.split('/')[-1], require_dim), mode='w') new_feature_test = pca.transform(test_feature) for each_sample in range(len(new_feature_test)): combine_str = '{0}'.format(test_label[each_sample]) for each_feature in range(require_dim): combine_str += '\t' + str(each_feature+1) + ':' + str(new_feature_test[each_sample][each_feature]) out_file_test.write(combine_str + '\n') out_file_test.close()' -----Calculating PCA----- ==> Output directory:{0} ==> Done'.format(output_dir)) weight_list = [] results = np.zeros((2, n_portfolios)) # Calculate portfolio volatility for i in range(n_portfolios): randarr = np.random.rand(n_assets) weights = randarr/randarr.sum() # Five weights summing to 1 weight_list.append(weights) # calculate annualised portfolio return pf_ret = round(np.sum(mu * weights) * 252, 2) pf_volatility = np.sqrt(,, weights))) * np.sqrt(252) results[0,i] = pf_ret results[1,i] = pf_volatility pca = sk_pca(n_components=n_assets) pc = pca.fit_transform(prices) # plot the variance explained by pcs #, pca.explained_variance_ratio_) # plt.title('variance explained by pc') # # Select a portfolio out of the 100 selected_pf = 1 # get First `n_components` Principal components pcs_1_2 = pca.components_[0:n_components,:].T pc_weights = weight_list[selected_pf].dot(pcs_1_2) # pf_volatility = round(np.sqrt(,, weights))) * np.sqrt(n_days),2) # Portfolio volatility with PCs
# -*- coding: utf-8 -*- """ Principal componen analysis @author: David André Rodríguez Méndez (AndreRdz7) """ # Import libraries import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA as sk_pca # Get dataset df = pd.read_csv("./iris.csv") X = df.iloc[:, 0:4].values Y = df.iloc[:, 4].values # Make it standard X_std = StandardScaler().fit_transform(X) # PCA acp = sk_pca(n_components=2) Y = acp.fit_transform(X_std)
import pandas as pd # import plotly.plotly as py import as tls import plotly.graph_objs as g_objs from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA as sk_pca df = pd.read_csv('./datasets/iris/iris.csv') X = df.iloc[:, 0:4].values # getting the predictor variables y = df.iloc[:, 4].values # getting the result X_std = StandardScaler().fit_transform(X) # print(f'X_std =>\n{X_std}') acp = sk_pca(n_components=2) # the '2' was calculated in the last python file Y = acp.fit_transform(X_std) # print(f'Y =>\n{Y}') results = [] for name in ('setosa', 'versicolor', 'virginica'): result = g_objs.Scatter(x=Y[y == name, 0], y=Y[y == name, 1], mode='markers', name=name, marker=g_objs.scatter.Marker( size=8, line=g_objs.Line(color='rgba(255,255,255,0.2)', width=0.5), opacity=0.75))