Ejemplo n.º 1
0
def plot_PCA(data, features):
    X = data[features]
    y = data["categoria"]

    X_std = StandardScaler().fit_transform(X)
    acp = sk_pca(n_components=2)
    Y = acp.fit_transform(X_std)

    results = []

    for name in (2, 3, 13):
        result = go.Scatter(x=Y[y == name, 0],
                            y=Y[y == name, 1],
                            mode="markers",
                            name=name,
                            marker=go.Marker(size=8,
                                             line=go.Line(
                                                 color="rgba(225,225,225,0.2)",
                                                 width=0.5),
                                             opacity=0.75))
        results.append(result)

    data = go.Data(results)
    layout = go.Layout(xaxis=go.XAxis(title="CP1", showline=False),
                       yaxis=go.YAxis(title="CP2", showline=False))

    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig)

    return fig
Ejemplo n.º 2
0
def pca_initial_gui(data):  # Initial PCA function (no standardscaler)
    feat = (data.values[:, 3:]).astype('float64')
    ncom = 30

    # Initialise
    skpca1 = sk_pca(n_components=ncom)

    # Scale the features to have zero mean and standard devisation of 1
    # This is important when correlating data with very different variances
    # nfeat1 = StandardScaler().fit_transform(feat)

    # Fit the spectral data and extract the explained variance ratio
    X1 = skpca1.fit(feat)
    expl_var_1 = X1.explained_variance_ratio_

    # create scree plot

    fig = plt.figure(dpi=100, figsize=(10, 5))
    plt.bar(range(30),
            expl_var_1 * 100,
            label="Explained Variance %",
            color='blue',
            figure=fig)
    plt.xticks(np.arange(len(expl_var_1)), np.arange(1, len(expl_var_1) + 1))
    plt.plot(np.cumsum(expl_var_1) * 100,
             '-o',
             label='Cumulative variance %',
             color='green',
             figure=fig)
    plt.xlabel('PC Number')
    plt.ylabel('Explained Variance (%)')
    plt.legend()

    return fig
Ejemplo n.º 3
0
def get_xi_prob_from_sk(x_all, xp_index, xn_index):
    log("\n\n\n")
    log("SK")
    log("-----")
    skpca = sk_pca(n_components=2)
    pcs = skpca.fit_transform(x_all)
    log_debug("\tSK PCA: ", pcs.shape)

    pos_pcs = [pcs[idx] for idx in range(len(labels)) if labels[idx] == POSITIVE_CLASS]
    neg_pcs = [pcs[idx] for idx in range(len(labels)) if labels[idx] == NEGATIVE_CLASS]
    assert(len(pos_pcs) == len(pos_class_pcs))
    assert(len(neg_pcs) == len(neg_class_pcs))
    draw_scatter_plot(pcs[:, 0], P[:, 1], pcs[xp_index], pcs[xn_index], labels)
    get_xi_prob_by_bayes(pos_pcs, neg_pcs, pcs[xp_index], pcs[xn_index])
Ejemplo n.º 4
0
def pca_final(data, ncomp):  # PCA fitting with scores as result
    # Read the features
    feat = (data.values[:, 3:]).astype('float32')

    # Scale the features to have zero mean and standard devisation of 1
    # This is important when correlating data with very different variances
    nfeat1 = StandardScaler().fit_transform(feat)

    skpca1 = sk_pca(n_components=ncomp)

    # Transform on the scaled features
    Xt1 = skpca1.fit_transform(nfeat1)
    scores = pd.DataFrame(Xt1)

    return scores
 def generate_pca(self, require_dim, test_file=None, output_dir='./fs_result/'):
     logging.info('   -----Calculating PCA---- ')
     out_file_train = open('{0}pca_{1}_{2}_train'.format(output_dir, self.data_name, require_dim), mode='w')
     pca = sk_pca(n_components=require_dim).fit(self.data_feature)
     new_feature_train = pca.transform(self.data_feature)
     print pca.explained_variance_ratio_
     for each_sample in range(len(new_feature_train)):
         combine_str = '{0}'.format(self.data_label[each_sample])
         for each_feature in range(require_dim):
             combine_str += '\t' + str(each_feature+1) + ':' + str(new_feature_train[each_sample][each_feature])
         out_file_train.write(combine_str + '\n')
     out_file_train.close()
     if test_file is not None:
         test_label, test_feature = SupervisedFs.modify_input_data(test_file)
         out_file_test = open('{0}pca_{1}_{2}_test'.format(output_dir, test_file.split('/')[-1], require_dim), mode='w')
         new_feature_test = pca.transform(test_feature)
         for each_sample in range(len(new_feature_test)):
             combine_str = '{0}'.format(test_label[each_sample])
             for each_feature in range(require_dim):
                 combine_str += '\t' + str(each_feature+1) + ':' + str(new_feature_test[each_sample][each_feature])
             out_file_test.write(combine_str + '\n')
         out_file_test.close()
     logging.info('   -----Calculating PCA----- ==>  Output directory:{0}  ==> Done'.format(output_dir))
Ejemplo n.º 6
0
plt.show()

weight_list = []
results = np.zeros((2, n_portfolios))
# Calculate portfolio volatility
for i in range(n_portfolios):
    randarr = np.random.rand(n_assets)
    weights = randarr/randarr.sum()  # Five weights summing to 1
    weight_list.append(weights)
    # calculate annualised portfolio return
    pf_ret = round(np.sum(mu * weights) * 252, 2)
    pf_volatility = np.sqrt(np.dot(weights.T,np.dot(cov_matrix, weights))) * np.sqrt(252)
    results[0,i] = pf_ret
    results[1,i] = pf_volatility

pca = sk_pca(n_components=n_assets)
pc = pca.fit_transform(prices)
# plot the variance explained by pcs
# plt.bar(range(n_assets), pca.explained_variance_ratio_)
# plt.title('variance explained by pc')
# plt.show()

# Select a portfolio out of the 100
selected_pf = 1

# get First `n_components` Principal components
pcs_1_2 = pca.components_[0:n_components,:].T
pc_weights = weight_list[selected_pf].dot(pcs_1_2)

# pf_volatility = round(np.sqrt(np.dot(weights.T,np.dot(cov_matrix, weights))) * np.sqrt(n_days),2)
# Portfolio volatility with PCs
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
"""
Principal componen analysis

@author: David André Rodríguez Méndez (AndreRdz7)
"""
# Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA as sk_pca
# Get dataset
df = pd.read_csv("./iris.csv")
X = df.iloc[:, 0:4].values
Y = df.iloc[:, 4].values
# Make it standard
X_std = StandardScaler().fit_transform(X)
# PCA
acp = sk_pca(n_components=2)
Y = acp.fit_transform(X_std)
Ejemplo n.º 8
0
import pandas as pd
# import plotly.plotly as py
import plotly.tools as tls
import plotly.graph_objs as g_objs
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA as sk_pca

df = pd.read_csv('./datasets/iris/iris.csv')

X = df.iloc[:, 0:4].values  # getting the predictor variables
y = df.iloc[:, 4].values  # getting the result
X_std = StandardScaler().fit_transform(X)
# print(f'X_std =>\n{X_std}')

acp = sk_pca(n_components=2)  # the '2' was calculated in the last python file
Y = acp.fit_transform(X_std)

# print(f'Y =>\n{Y}')

results = []

for name in ('setosa', 'versicolor', 'virginica'):
    result = g_objs.Scatter(x=Y[y == name, 0],
                            y=Y[y == name, 1],
                            mode='markers',
                            name=name,
                            marker=g_objs.scatter.Marker(
                                size=8,
                                line=g_objs.Line(color='rgba(255,255,255,0.2)',
                                                 width=0.5),
                                opacity=0.75))