Beispiel #1
0
    def fit(self, hyperparameters, X, y):

        # PCA only works if the n_components is less than the min of the # of samples and # of columns 
        if hyperparameters['n_components'] < np.min(X.shape):
            self.pca = sklearnPCA(**hyperparameters).fit(X)

        else:
            # Select the fewest number of features in all other cases 
            self.pca = sklearnPCA(**self.grid[0]).fit(X)
def get_pca():
    
    pca_matrix=[]
    error=0
    filepath=session['filepath']
    filetype=session['filetype']
    labeltype=session['label']
    
    request_data = json.loads(request.data)
    #print "request data in /pca:"
    #print request_data
    ordering = request_data["order"]
    distance_type=request_data["distance_type"]
    dimension_factor=request_data["dimension_factor"]
    pca_dimension_count=request_data["pca_dimension_count"]
    
    order=ex.parse_order(ordering)
    order=list(int(k) for k in order)
   
    alpha=ex.readFromFile(filepath, filetype, labeltype,distance_type,dimension_factor)
    dataset=np.array(alpha)
    
    for orderno in order:
        
        pca_matrix.append(dataset[orderno])
    
    
    data_columns=len(pca_matrix[0])
    if (data_columns >= pca_dimension_count):
        sklearn_pca = sklearnPCA(n_components=pca_dimension_count)
        Y_sklearn = sklearn_pca.fit_transform(pca_matrix)
        final_pca_values=np.transpose(Y_sklearn)
        final_pca_values=list(list(float(f) for f in d) for d in final_pca_values)
    elif(data_columns == 2):
        sklearn_pca = sklearnPCA(n_components=2)
        Y_sklearn = sklearn_pca.fit_transform(pca_matrix)
        final_pca_values=np.transpose(Y_sklearn)
        final_pca_values=list(list(float(f) for f in d) for d in final_pca_values)
    else:
        error=1
        final_pca_values=[]
    
    
    #print "final pca values:"
    #print np.array(final_pca_values)
    filePath=os.path.join(app.config['UPLOAD_FOLDER'], "results.txt")
    f = open(filePath, "a")
    f.write("PCA matrix for the ordering"+str(order)+"for "+str(len(final_pca_values))+"principal components:\n")
    f.write(str(np.transpose(final_pca_values))+"\n\n")
    f.close()
    
    response_data={}
    response_data["pca_values"]=final_pca_values
    response_data["error_value"]=error
   
    return json.dumps(response_data)
Beispiel #3
0
def pca_step_na(trans_std,promo_std):
    from sklearn.decomposition import PCA as sklearnPCA
    trans_pca = sklearnPCA(n_components=8)
    trans_new = trans_pca.fit_transform(trans_std)
  
    # promo PCA
    promo_pca = sklearnPCA(n_components=12)
    promo_new = promo_pca.fit_transform(promo_std)
    pca_dict = {"trans":trans_pca,"promo":promo_pca}
    return trans_new,promo_new,pca_dict
Beispiel #4
0
def pca_step(trans_std,food_std,promo_std):
    from sklearn.decomposition import PCA as sklearnPCA
    trans_pca = sklearnPCA(n_components=9)
    trans_new = trans_pca.fit_transform(trans_std)

    #food pca
    food_pca = sklearnPCA(n_components=24)
    food_new = food_pca.fit_transform(food_std)

    # promo PCA
    promo_pca = sklearnPCA(n_components=13)
    promo_new = promo_pca.fit_transform(promo_std)

    pca_dict = {"trans":trans_pca,"food":food_pca,"promo":promo_pca}
    return trans_new,food_new,promo_new,pca_dict
Beispiel #5
0
def Linear_PCA(HE_MI_train_test, numdim=2):
    '''
    개요
        - PCA: EIG value 순서대로 Linear Transform을 하여 그 순서대로 sorting

    INPUT
        - numdim : Dimension

    OUTPUT
        - 1. sklearn_HE_train_fit : numdim * numHEtrain
        - 2. sklearn_MI_train_fit : numdim * numMItrain
        - 3. sklearn_HE_test_fit : numdim * numHEtest
        - 4. sklearn_MI_test_fit : numdim * numMItest
    '''

    MyDataSet = HE_MI_train_test
    my_HEtraining = MyDataSet[0]
    my_MItraining = MyDataSet[1]
    my_HEtest = MyDataSet[2]
    my_MItest = MyDataSet[3]

    from sklearn.decomposition import PCA as sklearnPCA

    sklearn_pca = sklearnPCA(n_components=numdim)

    sklearn_HE_train_fit = sklearn_pca.fit_transform(my_HEtraining)
    sklearn_MI_train_fit = sklearn_pca.fit_transform(my_MItraining)
    sklearn_HE_test_fit = sklearn_pca.fit_transform(my_HEtest)
    sklearn_MI_test_fit = sklearn_pca.fit_transform(my_MItest)

    return [sklearn_HE_train_fit, sklearn_MI_train_fit, sklearn_HE_test_fit, sklearn_MI_test_fit]
def plotPCA(labels, data, inputFile, outputFile, store=False):
    sklearn_pca = sklearnPCA(n_components=2)
    sklearn_pca.fit(data)
    newData = sklearn_pca.transform(data)
    xval = newData[:, 0]
    yval = newData[:, 1]
    lbls = set(labels)
    #(predicted_labels)
    fig1 = plt.figure(1)
    #print(lbls)
    for lbl in lbls:
        #cond = predicted_labels == lbl
        cond = [i for i, x in enumerate(labels) if x == lbl]
        plt.plot(xval[cond],
                 yval[cond],
                 linestyle='none',
                 marker='o',
                 label=lbl,
                 markersize=3)

    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.legend(numpoints=1, loc=0, fontsize='x-small')
    plt.subplots_adjust(bottom=.20, left=.20)
    plt.grid()
    fig1.suptitle("PCA plot for DBSCAN in " + inputFile.split("/")[-1],
                  fontsize=20)
    if store:
        fig1.savefig("_".join(
            [outputFile, inputFile.split("/")[-1].split(".")[0]]) + ".png")
    else:
        plt.show()
Beispiel #7
0
    def create_pca(self, input_tsv_file_for_pca, output_html):
        df = pd.read_csv(filepath_or_buffer=input_tsv_file_for_pca,
                         header=0,
                         sep='\t')

        X = df.ix[:, 1:24].values
        y = df.ix[:, 23].values
        #WARNING OCCURANCE
        standardised_X = StandardScaler().fit_transform(X)
        sklearn_pca = sklearnPCA(n_components=2)
        Y_sklearn = sklearn_pca.fit_transform(standardised_X)

        traces = []

        factor_group = df['Group'].unique()
        print "The factors (groups) found: ", factor_group
        for name in factor_group:

            trace = Scatter(x=Y_sklearn[y == name, 0],
                            y=Y_sklearn[y == name, 1],
                            mode='markers',
                            name=name,
                            marker=Marker(
                                size=12,
                                line=Line(color='rgba(217, 217, 217, 0.14)',
                                          width=0.5),
                                opacity=0.8))
            traces.append(trace)
        data = Data(traces)
        layout = Layout(xaxis=XAxis(title='PC1', showline=False),
                        yaxis=YAxis(title='PC2', showline=False))
        fig = Figure(data=data, layout=layout)
        plot(fig, show_link=False, filename=output_html, auto_open=False)
 def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
def plotPCA(labels, data, inputFile, outputFile, store=False):
    # apply PCA
    sklearn_pca = sklearnPCA(n_components=2)
    newData = sklearn_pca.fit_transform(data)

    # get x and y values
    xval = newData[:, 0]
    yval = newData[:, 1]
    lbls = set(labels)

    fig1 = plt.figure(1)

    # plot for each label
    for lbl in lbls:
        cond = [i for i, x in enumerate(labels) if x == lbl]
        plt.plot(xval[cond],
                 yval[cond],
                 linestyle='none',
                 marker='o',
                 label=lbl)

    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.legend(numpoints=1, loc=0)
    plt.subplots_adjust(bottom=.20, left=.20)
    fig1.suptitle("PCA plot for centroids in " + inputFile.split("/")[-1],
                  fontsize=20)

    # if PCA output parameter given then store the plot else display it
    if store:
        fig1.savefig("_".join(
            [outputFile, inputFile.split("/")[-1].split(".")[0]]) + ".png")
    else:
        plt.show()
Beispiel #10
0
def vPca(filepath):
    df = pd.read_csv(filepath_or_buffer=filepath, sep=',')
    x = df.ix[:, :].values
    x = x.transpose()
    sklearn_pca = sklearnPCA(n_components=2)
    x2 = sklearn_pca.fit_transform(x)
    plots(x2)
Beispiel #11
0
def PCA(X, labels):

    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X)

    fig = plt.figure()
    # fig.suptitle(band, fontweight = 'bold')
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)

    km = KMeans(n_clusters=2)
    km.fit(Y_sklearn)

    #ax.scatter(Y_sklearn[:,0], Y_sklearn[:,1])

    #tresh = [0 if i >4 else 1 for i  in Y_sklearn[:,0]]
    colors = ['blue', 'red']
    for idx, row in enumerate(Y_sklearn):
        ax2.scatter(row[0], row[1], color=colors[km.labels_[idx]], alpha=0.5)
        ax1.plot(X[idx, :], color=colors[km.labels_[idx]], alpha=0.2)

    labels.iloc[km.labels_ == 1].to_csv(
        '/Users/ryszardcetnarski/Desktop/Nencki/Badanie_NFB/Dane/miesniowcy_pca.csv '
    )
    return km.labels_, labels.iloc[km.labels_ == 1]
Beispiel #12
0
def PCA(x,x_test):
    X_std = StandardScaler().fit_transform(x)
    sklearn_pca = sklearnPCA(n_components='mle', svd_solver='full')
    x = sklearn_pca.fit_transform(X_std)
    x_test = sklearn_pca.transform(x_test)

    return x,x_test
Beispiel #13
0
def ABC_summaryStatistics_PCA(Surveys):
    """ Heavily inspired by https://plot.ly/ipython-notebooks/principal-component-analysis/ """
    [A, B] = Surveys
    
    newdist = lambda x:  dbc.measurand( x.Dproj_pix()/x.GCl.R200(), 'Dproj', label='$D_\mathrm{proj,rel}$', un='$R_{200}$' )
    plotmeasures = [lambda x: x.LLS, lambda x: x.P_rest, lambda x: x.Mach, newdist]

    X1 = A.fetch_pandas(plotmeasures, surname=False).dropna().as_matrix()   #.data()   #, kwargs_FilterCluster={}, kwargs_FilterObjects={}
    X2 = B.fetch_pandas(plotmeasures, surname=False).dropna().as_matrix()   #.data()   #, kwargs_FilterCluster={}, kwargs_FilterObjects={}
    
    X1_std = StandardScaler().fit_transform(X1)
    X2_std = StandardScaler().fit_transform(X2)   
    
#    # http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html
#    plt.cla()
#    pca = decomposition.PCA(n_components=3)
#    pca.fit(X)
#    X = pca.transform(X)
     
    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X1_std)
    
    """ This gives you a proxy for the average summed square error in the 2-D dimensional reduction via pca """
    distance = np.sum(Y_sklearn**2)/len(Y_sklearn[0])
    return distance
Beispiel #14
0
    def Q1(self):

        # part one
        class1 = np.random.multivariate_normal(self.m1, self.cov, 1000).T
        class2 = np.random.multivariate_normal(self.m2, self.cov, 1000).T
        plt.plot(class1[0,:], class1[1,:], 'x')
        plt.plot(class2[0,:], class2[1,:], 'x')



        # part two : calculate pca
        samples = np.concatenate((class1, class2), axis=1)

        mlab_pca = mlabPCA(samples.T)
        plt.figure(2)
        plt.plot(mlab_pca.Y[0:1000, 0], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
        plt.plot(mlab_pca.Y[1000:2000, 0], '^', markersize=7, color='yellow', alpha=0.5, label='class2')


        # part three
        plt.figure(1)
        sklearn_pca = sklearnPCA(n_components=1)
        sklearn_transf = sklearn_pca.fit_transform(samples.T)
        p = sklearn_pca.inverse_transform(sklearn_transf)
        plt.figure(1)
        plt.plot(p[0:1000, 0], p[0:1000, 1], 'x')
        plt.plot(p[1000:2000, 0], p[1000:2000, 1], 'x')

        error = ((p - samples.T) ** 2).mean()
        print((error))
        print (np.math.sqrt (error))

        plt.show()
def pcaTransform(context, mesh, features, K=5):
    #         X_std = features;#StandardScaler().fit_transform(X);
    X_std = StandardScaler().fit_transform(features)
    sklearn_pca = sklearnPCA(n_components=K)
    Y_sklearn = sklearn_pca.fit_transform(X_std)

    mu = sklearn_pca.mean_
    mu.shape = (mu.shape[0], 1)
    D = sklearn_pca.explained_variance_
    D_ratio = sklearn_pca.explained_variance_ratio_
    V = sklearn_pca.components_
    print('*' * 40)
    print('DATA ENTRIES SHAPE ::: ', features.shape)
    print('MEAN MATRIX SHAPE ::: ', mu.shape)
    print('EIGEN VALUES SHAPE ::: ', D.shape)
    print('EIGEN VECTORS SHAPE ::: ', V.shape)
    print('TRANSFORMED SHAPE ::: ', Y_sklearn.shape)
    sio.savemat(
        bpy.path.abspath('%s/%s.mat' % (mesh.signatures_dir, mesh.name)), {
            'eigenvectors': V.T,
            'eigenvalues': D,
            'mu': mu,
            'X': X_std,
            'XMinusMu': (X_std.T - mu),
            'transformed': Y_sklearn
        })
    print('FINISHED SAVING ::: %s/%s.mat' % (mesh.signatures_dir, mesh.name))
    return mu, Y_sklearn
Beispiel #16
0
def f(train,threshold,test):
    hi=h(train)
    h_score=pd.DataFrame(hi, index=np.array(range(1,21149)))
    gene_ls=h_score.index[h_score.iloc[:,0]>1].tolist()
    candidate_genes=['V{0}'.format(element) for element in gene_ls]

    # qualified genes were selected 

    stdsc = preprocessing.StandardScaler()
    np_scaled_train = stdsc.fit_transform(train.loc[:,candidate_genes])
    np_scaled_test  = stdsc.transform(test.loc[:,candidate_genes])
    pca = sklearnPCA(n_components=1)   
    X_train_pca = pca.fit_transform(np_scaled_train) # This is the result 
    X_test_pca  = pca.transform(np_scaled_test)
    eigen_val=pca.explained_variance_  #eigen value is the explained variance 

    
    #assign pca score to the test dataset 
    test=test.assign(w=pd.Series(np.ones(len(test.patient_id))))
    test['w']=X_test_pca
    testset_surv=test[['event_free_survival_time_days','death','w']]
    
    #do cox-regression

    # Using Cox Proportional Hazards model
    cph = CoxPHFitter()
    cph.fit(testset_surv,'event_free_survival_time_days',event_col='death')
    
    return cph.print_summary()
Beispiel #17
0
    def format_data(self):

        kdd_train_data = np.concatenate([
            self.train_kdd_numeric, self.train_kdd_binary,
            self.train_kdd_nominal
        ],
                                        axis=1)
        kdd_test_data = np.concatenate([
            self.test_kdd_numeric, self.test_kdd_binary, self.test_kdd_nominal
        ],
                                       axis=1)

        kdd_train_data = np.concatenate(
            [kdd_train_data, self.train_kdd_label_2classes], axis=1)
        # kdd_test_data = np.concatenate([self.test_kdd_numeric, self.test_kdd_binary, self.test_kdd_nominal, self.test_kdd_label_2classes], axis=1)
        kdd_test_data = np.concatenate(
            [kdd_test_data, self.test_kdd_label_2classes], axis=1)
        self.X_train, self.X_test, y_train, y_test = kdd_train_data[:, :
                                                                    -1], kdd_test_data[:, :
                                                                                       -1], kdd_train_data[:,
                                                                                                           -1], kdd_test_data[:,
                                                                                                                              -1]

        data_pca = sklearnPCA(n_components=15)
        data_pca = data_pca.fit(self.X_train)
        # numeric_pca = numeric_pca.fit(np.concatenate((self.train_kdd_numeric, self.test_kdd_numeric), axis=0))
        self.X_train = data_pca.transform(self.X_train)
        self.X_test = data_pca.transform(self.X_test)

        self.y_train = np.array(list(map(int, y_train)))
        self.y_test = np.array(list(map(np.int64, y_test)))
Beispiel #18
0
def run_pca(expression):
    # Load Expression data

    df = pd.read_table(expression, header=0, index_col=0)
    run_ids = list(df.columns.values)
    dataMatrix = np.transpose(np.array(df))

    run_ids = [s.replace('.htseq', '') for s in run_ids]

    # Run PCA
    sklearn_pca = sklearnPCA(n_components=2)
    sklearn_transf = sklearn_pca.fit_transform(
        preprocessing.maxabs_scale(dataMatrix, axis=0))

    with sns.axes_style("whitegrid"):
        for run, pca_data in zip(run_ids, sklearn_transf):
            plt.plot(pca_data[0],
                     pca_data[1],
                     'o',
                     markersize=7,
                     alpha=0.5,
                     color='gray')
            plt.text(pca_data[0], pca_data[1], run)

        plt.xlabel('PC 1 (%0.2f %%)' %
                   (sklearn_pca.explained_variance_ratio_[0] * 100))
        plt.ylabel('PC 2 (%0.2f %%)' %
                   (sklearn_pca.explained_variance_ratio_[1] * 100))

        plt.show()
Beispiel #19
0
def PCA(df, class_name):
    # Cannot do PCA on an empty matrix
    if len(list(df)) < 2:
        return df, [], [], []

    # Figure out which columns can be considered (only float or int columns but not the class column)
    cols = []
    for item in list(df):
        if 'float' in df[item].dtypes.name:
            if item != class_name:
                cols.append(df.columns.get_loc(item))

    # Get new dataframe
    df_new = df[df.columns[cols]]

    # Set this as the data to analyze
    X = df_new.values

    # Standardize the data
    X_std = StandardScaler().fit_transform(X)

    # Do PCA
    pca = sklearnPCA(n_components=len(list(df_new)))
    Y = pca.fit_transform(X_std)

    ## Get variance contributions
    var_exp = pca.explained_variance_ratio_
    cum_var_exp = pca.explained_variance_ratio_.cumsum()

    return Y, var_exp, cum_var_exp
Beispiel #20
0
 def pca(self,winSize):
     data = np.zeros((len(self.data),len(self.data['FUNC'][winSize])))
     i=0
     for dEl in sorted(self.data):
         self.data[dEl][winSize] = normalizeMaxMin(self.data[dEl][winSize])
         data[i] = self.data[dEl][winSize]
         i+=1
         
     X_std = StandardScaler().fit_transform(np.transpose(data))
     sklearn_pca = sklearnPCA(n_components=2)
     Y_sklearn = sklearn_pca.fit_transform(X_std)
     traces = []
     
     trace = go.Scatter(
         x=Y_sklearn[:,0],
         y=Y_sklearn[:,1],
         mode='markers',
         marker = go.Marker(
             size=12,
             line= go.Line(
                 color='rgba(217, 217, 217, 0.14)',
                 width=0.5),
             opacity=0.8))
     traces.append(trace)
     
     
     data = go.Data(traces)
     layout = go.Layout(xaxis = go.XAxis(title='PC1', showline=False),
                        yaxis = go.YAxis(title='PC2', showline=False))
     fig = go.Figure(data=data, layout=layout)
     if self.outputType=='file':
         print(py.plot(fig, filename='pca.html'))
     else:
         return py.plot(fig, output_type='div')
def dataframe_components(df2,lon,columns):

    import numpy as np
    import pandas as pd
    from sklearn import tree
    from sklearn import metrics
    from sklearn import cross_validation
    import matplotlib.pyplot as plt
    
    from sklearn.decomposition import PCA as sklearnPCA
    X=df2.values
    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(X)
    pca=sklearnPCA(n_components=lon).fit_transform(X_std)
    list_comp_pca=[]


    # CREACCION DATAFRAME CON COMPONENTES PRINCIPALES

    for i in range(0,lon):
        v="Componente"+str(i)
        list_comp_pca.append(v)

    dd1=pd.DataFrame(X_std,columns=columns)
    dd2=pd.DataFrame(pca,columns=list_comp_pca)
    df3=pd.concat([dd1,dd2],axis=1)
    return df3
Beispiel #22
0
    def pca(self):

        # remove WHERE when table cleaned up to remove header rows
        statement = (
            """SELECT transcript_id, TPM, sample_id FROM %s
        where transcript_id != 'Transcript' """
            % self.table
        )

        # fetch data
        df = self.getDataFrame(statement)

        # put dataframe so row=genes, cols = samples, cells contain TPM
        pivot_df = df.pivot("transcript_id", "sample_id")["TPM"]

        # filter dataframe to get rid of genes where TPM == 0 across samples
        filtered_df = pivot_df[pivot_df.sum(axis=1) > 0]

        # add +1 to counts and log transform data.
        logdf = np.log(filtered_df + 0.1)

        # Scale dataframe so variance =1 across rows
        logscaled = sklearn_scale(logdf, axis=1)

        # turn array back to df and add transcript id back to index
        logscaled_df = pd.DataFrame(logscaled)
        logscaled_df.index = list(logdf.index)

        # Now do the PCA - can change n_components
        sklearn_pca = sklearnPCA(n_components=self.n_components)
        sklearn_pca.fit(logscaled_df)

        index = logdf.columns

        return sklearn_pca, index
def pca_analysis(indexname,dataframe):
    df = dataframe
    column_count = len(df.columns)

    X = df.ix[:,1:column_count].values
    zip = df.ix[:,0].values

    #Standardize Data
    X_std = StandardScaler().fit_transform(X)
        
    #Generate PCA Components
    sklearn_pca = sklearnPCA(n_components=1)
    Y_sklearn = sklearn_pca.fit_transform(X_std)

    explained_ratio = sklearn_pca.explained_variance_ratio_
    covariance_array = sklearn_pca.get_covariance()

    df_final = pd.DataFrame({'zip5':zip,indexname:Y_sklearn[:,0]})
    
    #Normalize Data on a 0 to 1 scale
    #zip5_final = df_final['zip5'].values
    #minmax_scale = preprocessing.MinMaxScaler().fit(df_final[[indexname]])
    #minmax = minmax_scale.transform(df_final[[indexname]])
    #df_minmax = pd.DataFrame({'zip5':zip5_final,indexname:minmax[:,0]})

    return df_final
Beispiel #24
0
    def __init__(self):
        """ c'tor """

        self.pca = sklearnPCA(n_components=2)
        self.fit_executed = False
        self.X_tr = None
        self.Xn_tr = None
Beispiel #25
0
 def testPCA(self, dist):
     sklearn_pca = sklearnPCA(n_components=2)
     sklearn_transf = -1 * sklearn_pca.fit_transform(dist)
     print('MYPCA')
     print(self.PCA(dist, 2)[0])
     print('PCALibrary')
     print(sklearn_transf)
Beispiel #26
0
def read_dataset(Normalize=1):
    data = pd.read_csv('../../Dataset/Iris/Iris_2_classes.csv')  #read dataset
    data['Species'] = data['Species'].replace(
        ["Iris-setosa", "Iris-versicolor"], (0, 1))  #encode label
    y = data['Species']  #lable
    x = data.drop(['Species', 'Id'], axis=1)  #drop lable and id
    x = np.asarray(x)  #put it in array
    y = np.asarray(y)  #put it in array
    x = np.nan_to_num(x)  #convert any nan to 0
    train_x, test_x, train_y, test_y = train_test_split(
        x, y, test_size=0.2,
        random_state=50)  #split dataset to 80 train and 20 test
    if Normalize == 1:  #normalize dataset
        scaler = MinMaxScaler()
        train_x = scaler.fit_transform(train_x)
        test_x = scaler.transform(test_x)
    # draw data
    pca = sklearnPCA(n_components=2)  # 2-dimensional PCA
    transX = pd.DataFrame(pca.fit_transform(x))
    plt.scatter(transX[y == 0][0], transX[y == 0][1], label='Class 1', c='red')
    plt.scatter(transX[y == 1][0],
                transX[y == 1][1],
                label='Class 2',
                c='blue')
    plt.legend()
    plt.show()
    ##################
    return x, y, train_x, train_y, test_x, test_y
Beispiel #27
0
 def pcaWiki(self,file):
     self.formDataPCA(file)           
     X_std = StandardScaler().fit_transform(self.X)
     sklearn_pca = sklearnPCA(n_components=2)
     Y_sklearn = sklearn_pca.fit_transform(X_std)
     traces = []
 
     for name in self.names:
         trace = go.Scatter(
             x=Y_sklearn[self.y==name,0],
             y=Y_sklearn[self.y==name,1],
             mode='markers',
             name=name,
             marker=go.Marker(
                 size=12,
                 line=go.Line(
                     color='rgba(217, 217, 217, 0.14)',
                     width=0.5),
                 opacity=0.8))
         traces.append(trace)
     data = go.Data(traces)
     layout = go.Layout(xaxis=go.XAxis(title='PC1', showline=False),
                     yaxis=go.YAxis(title='PC2', showline=False))
     fig = go.Figure(data=data, layout=layout)
     if (self.outputType == 'file'):
         print(py.plot(fig,filename='pca')) 
     else:
         return (py.plot(fig,include_plotlyjs='False',output_type='div')) 
Beispiel #28
0
def main():
	
	with open('dataset.pkl', 'rb') as f:
		xr, y, features = pickle.load(f)

	#y = xr.label
	#Xw = xr.drop('label',axis = 1 )
	Xw = xr
	Xw = Xw.fillna(method='ffill')
	Xw = Xw.fillna(method='bfill')

	#ax = sns.countplot(y, label="Count")
	#plt.show()
	Wk, Tr, Bs, Cr, Nt, Bk = y.value_counts()
	print('Number of Bike: ',Bk)
	print('Number of Bus: ',Bs)
	print('Number of Car: ',Cr)
	print('Number of Nothing: ',Nt)
	print('Number of Train: ',Tr)
	print('Number of Walk: ',Wk)

	pca = sklearnPCA(n_components=2) #2-dimensional PCA
	transformed = pd.DataFrame(pca.fit_transform(Xw))

	plt.scatter(transformed[y=='Walk'][0], transformed[y=='Walk'][1], label='Walk', c='darkgreen')
	plt.scatter(transformed[y=='Bike'][0], transformed[y=='Bike'][1], label='Bike', c='red')
	plt.scatter(transformed[y=='Train'][0], transformed[y=='Train'][1], label='Train', c='yellow')
	plt.scatter(transformed[y=='Bus'][0], transformed[y=='Bus'][1], label='Bus', c='blue')
	plt.scatter(transformed[y=='Car'][0], transformed[y=='Car'][1], label='Car', c='lightgreen')
	plt.scatter(transformed[y=='Nothing'][0], transformed[y=='Nothing'][1], label='Nothing', c='black')

	plt.legend()
	plt.show()
Beispiel #29
0
def pca(X, y, labels, pic_file, PCA=True):
    from mpl_toolkits.mplot3d import Axes3D 
    
    # fit the data for PCA and plotting
    # when PCA=False, plot the original data into 3D
    pca = sklearnPCA (n_components = 3) 
    if PCA==True: X_pca = pd.DataFrame (pca.fit_transform (X)) 
    else: X_pca=X

    # colors and markers for each class
    colors = {0:"b", 1:"r", 2:"g", 3:"c", 4:"m", 5:"k"}
    markers = {0:"o", 1:"^", 2:"D", 3:"*", 4:"x", 5:"p"}
    num_labels = len(labels)
    
    #Set a figure object
    ans=input("Save 3D plot of samples as a picture? (y/n): ")
    if (ans=="y") or (ans=="Y"): save=True
    else: save=False
    
    fig = plt.figure (figsize = (10, 10)) 
    ax = fig.add_subplot (111, projection = "3d")
    
    for i in range(num_labels):
        X_in_cls=X_pca[y == labels[i]]
        ax.scatter(X_in_cls[0], X_in_cls[1], X_in_cls[2], \
                   c = colors[i], marker = markers[i], label = labels[i])
    ax.legend ()
    plt.title (pic_file.split("_")[0])
    
    plt.show()
    
    if save==True:
        fig.savefig("%s%s_pca.png" % (VAR.out_path, pic_file), \
                bbox_inches="tight")
Beispiel #30
0
def cluster_model(users_data, num_cluster=3):

    array_users = users_data.values
    X = array_users[:, 1:17]
    X_std = StandardScaler().fit_transform(X)

    sklearn_pca = sklearnPCA(n_components=3)
    Y_sklearn = sklearn_pca.fit_transform(X_std)
    eigenValues = sklearn_pca.explained_variance_ratio_
    loadings = sklearn_pca.components_

    mu = np.mean(X, axis=0)

    nComp = 2
    Xhat = np.dot(
        sklearn_pca.transform(X)[:, :nComp],
        sklearn_pca.components_[:nComp, :])
    Xhat = mu + Xhat
    Xhat = pd.DataFrame(Xhat)

    # X = PCA on previous data
    X = Xhat.ix[:, '0':'1']
    k = num_cluster  # Define the number of clusters in which we want to partion the data
    kmeans = KMeans(n_clusters=k)  # Run the algorithm kmeans
    kmeans.fit(X)
    ##sklearn.preprocessing.StandardScaler
    centroids = kmeans.cluster_centers_  # Get centroid's coordinates for each cluster
    labels = kmeans.labels_  # Get labels assigned to each data
    final_labels = users_data[['user_id']]
    final_labels['labels'] = pd.DataFrame(labels)

    return final_labels
Beispiel #31
0
def run_pca(table, n_components):
    '''
    ##This function is broken!##
    Runs PCA on a given table for a given number of components
    Params:
        table (2d array): array of traces 
        n_components (int): Number of components to be visualized
    Returns:
        covar_matrix: Covariance matrix
        variance (list): list of variances
        components (list): list of components 
    '''
    #calculate variance explained and cumulative variance explained
    covar_matrix = sklearnPCA(n_components=10)
    covar_matrix.fit(table)
    variance = covar_matrix.explained_variance_ratio_
    var = np.cumsum(
        np.round(covar_matrix.explained_variance_ratio_, decimals=3) * 100)

    #print graph of the variance explained with [n] features
    plt.ylabel('%variance explained')
    plt.xlabel('# of principle components')
    plt.title('Variance Explained')
    ## plt.ylim(70,100.5)
    plt.style.context('seaborn-whitegrid')
    # plt.plot(variance[:n_components])
    variance = covar_matrix.explained_variance_ratio_
    components = covar_matrix.components_
    return covar_matrix
    return variance
    return components
Beispiel #32
0
 def pca_built(self, all_samples):
     from sklearn.decomposition import PCA as sklearnPCA
     sklearn_pca = sklearnPCA(n_components=2)
     sklearn_transf = sklearn_pca.fit_transform(all_samples.T)
     sklearn_transf = sklearn_transf * (-1)
     plt.plot(sklearn_transf[0:20, 0],
              sklearn_transf[0:20, 1],
              'o',
              markersize=7,
              color='yellow',
              alpha=0.5,
              label='class1')
     plt.plot(sklearn_transf[20:40, 0],
              sklearn_transf[20:40, 1],
              '^',
              markersize=7,
              color='black',
              alpha=0.5,
              label='class2')
     plt.xlabel('x_values')
     plt.ylabel('y_values')
     plt.xlim([-4, 4])
     plt.ylim([-4, 4])
     plt.legend()
     plt.title('Transformed samples with class labels from built PCA')
     plt.draw()
     plt.show()
def implement_pca_betweem_two_frames(image1, image2):

    #read image
    pic1 = cv2.imread(image1)
    pic2 = cv2.imread(image2)

    #convert BGR to Gray
    prvs = cv2.cvtColor(pic1, cv2.COLOR_BGR2GRAY)
    next = cv2.cvtColor(pic2, cv2.COLOR_BGR2GRAY)

    #calculate optical flow
    flow = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5,
                                        1.2, 0)

    #obtain angle matrix: _ is magnitude and angle_matrix is measure by degree now.
    _, angle_matrix = cv2.cartToPolar(flow[..., 0],
                                      flow[..., 1],
                                      angleInDegrees=True)

    #implement normal PCA based on the coarse foreground
    sklearn_pca = sklearnPCA()
    angle_std = StandardScaler().fit_transform(angle_matrix)
    sklearn_pca.fit_transform(angle_std)

    #convert to uint8
    pca_implement = angle_std.astype(np.uint8)

    #write image
    cv2.imwrite('pca_fore_ground_matrix_' + str(image1) + '.png',
                pca_implement)

    #destroy table
    cv2.destroyAllWindows()
Beispiel #34
0
    def pca(self):

        # remove WHERE when table cleaned up to remove header rows
        statement = ("""SELECT transcript_id, TPM, sample_id FROM %s
        where transcript_id != 'Transcript' """ % self.table)

        # fetch data
        df = self.getDataFrame(statement)

        # put dataframe so row=genes, cols = samples, cells contain TPM
        pivot_df = df.pivot('transcript_id', 'sample_id')['TPM']

        # filter dataframe to get rid of genes where TPM == 0 across samples
        filtered_df = pivot_df[pivot_df.sum(axis=1) > 0]

        # add +1 to counts and log transform data.
        logdf = np.log(filtered_df + 0.1)

        # Scale dataframe so variance =1 across rows
        logscaled = sklearn_scale(logdf, axis=1)

        # turn array back to df and add transcript id back to index
        logscaled_df = pd.DataFrame(logscaled)
        logscaled_df.index = list(logdf.index)

        # Now do the PCA - can change n_components
        sklearn_pca = sklearnPCA(n_components=self.n_components)
        sklearn_pca.fit(logscaled_df)

        index = logdf.columns

        return sklearn_pca, index
Beispiel #35
0
def kmeans():

    yeast_t = 7
    yeast_k = 6

    yeastData = np.empty([614, 7], dtype = float)
    with open('YeastGene.csv', 'rb') as yeastdata:
        yeastreader = csv.reader(yeastdata, delimiter=',')
        i = 0
        for row in yeastreader:
            yeastData[i] = row
            i += 1
    #print yeastData

    yeastCentroid = np.empty([yeast_k, 7], dtype = float)
    with open('YeastGene_Initial_Centroids.csv', 'rb') as yeastdata:
        yeastreader = csv.reader(yeastdata, delimiter=',')
        i = 0
        for row in yeastreader:
            yeastCentroid[i] = row
            i += 1
    #print yeastCentroid

    for t in range(0, yeast_t):
        yeast_c = [[] for i in range(0,yeast_k)]
        minCentroid = []
        for arr in yeastData:
            for cen in yeastCentroid:
                minCentroid.append(np.linalg.norm(arr - cen))
            yeast_c[minCentroid.index(min(minCentroid))].append(arr)
            minCentroid = []

        for k in range(0,yeast_k):
            yeastCentroid[k] = [float(sum(l))/len(l) for l in zip(*yeast_c[k])]
    #print "The new yeast Centroid values\n"
    #print yeastCentroid

    #print "The cluster sizes are - "
    print len(yeast_c[0]), len(yeast_c[1]), len(yeast_c[2]), len(yeast_c[3]), len(yeast_c[4]), len(yeast_c[5])
    clusters = np.zeros([614, 7], dtype=float)
    prev_len = 0
    for i in range(0,6):
        for j in range(0,len(yeast_c[i])):
            clusters[prev_len] = yeast_c[i][j]
            prev_len += 1

    sklearn_pca = sklearnPCA(n_components = 2)
    transf = sklearn_pca.fit_transform(clusters)
    plt.plot(transf[0:140, 0], transf[0:140, 1],'*', markersize = 7, color='blue', alpha=0.5, label='cluster 1')
    plt.plot(transf[140:191, 0], transf[140:191, 1],'*', markersize = 7, color='red', alpha=0.5, label='cluster 2')
    plt.plot(transf[191:355, 0], transf[191:355, 1],'*', markersize = 7, color='green', alpha=0.5, label='cluster 3')
    plt.plot(transf[355:376, 0], transf[355:376, 1],'*', markersize = 7, color='indigo', alpha=0.5, label='cluster 4')
    plt.plot(transf[376:538, 0], transf[376:538, 1],'*', markersize = 7, color='yellow', alpha=0.5, label='cluster 5')
    plt.plot(transf[538:614, 0], transf[538:614, 1],'*', markersize = 7, color='black', alpha=0.5, label='cluster 6')
    plt.xlim([-10, 10])
    plt.ylim([-10, 10])
    plt.legend()
    plt.title("Kmeans")
    plt.show()
 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
def apply_pca(data):
    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(data)

    from sklearn.decomposition import PCA as sklearnPCA
    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X_std)

    return Y_sklearn
Beispiel #38
0
def pcaDecomp(data, normalize = True):
  if normalize:
    data = StandardScaler().fit_transform(data)

  pca = sklearnPCA(n_components = 2)
  decomp = pca.fit_transform(data)
  # plt.scatter(data[:,0], data[:,1])
  # plt.show()
  histo2d(decomp, ranged = False)
Beispiel #39
0
 def pca(self, samples):
     '''
     Apply pca from sklearn.
     '''
     sklearn_pca = sklearnPCA(n_components=2)
     # Fit the model with samples
     fit = sklearn_pca.fit(samples)
     # Apply the dimensionality reduction on samples
     pca = fit.transform(samples)
     return pca
def pca_json(df, n_components=4, exp_var_min=.05):
    sklearn_pca = sklearnPCA(n_components=n_components)
    pca_points = sklearn_pca.fit_transform(df.T)
    exp_var, num_pc = pc_to_keep(sklearn_pca.explained_variance_ratio_,
                                 exp_var_min)
    pca_points_df = trim_pc(pca_points, num_pc)
    pca_points_df['sample'] = df.columns.values
    pca_points_df = append_exp_var(pc_df=pca_points_df,
                                   exp_var_list=exp_var,
                                   num_pc=num_pc)
    return pca_points_df
Beispiel #41
0
def sklearn_practice():
    from sklearn.decomposition import PCA as sklearnPCA
    import numpy as np

    class1_sample, class2_sample = random_gen()
    all_samples = np.concatenate((class1_sample, class2_sample), axis=1)

    sklearn_pca = sklearnPCA(n_components=2)
    sklearn_transf = sklearn_pca.fit_transform(all_samples.T)

    return sklearn_transf
Beispiel #42
0
def plotGraph(samples, n_samples, tags, dimensions):

    colours = ['blue', 'red', 'green', 'yellow', 'black']
    n_tags = len(tags)

    if dimensions == '2D':
        sklearn_pca = sklearnPCA(n_components=2)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        for i in range(n_tags):
            plt.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0],sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
                 'o', markersize=7, color=colours[i], alpha=0.5, label=tags[i])

        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
    #     plt.xlim([-4,4])
    #     plt.ylim([-4,4])
        plt.legend()
        plt.title('PCA')

    elif dimensions == '3D':
        sklearn_pca = sklearnPCA(n_components=3)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        fig = plt.figure(figsize=(8,8))
        ax = fig.add_subplot(111, projection='3d')
        plt.rcParams['legend.fontsize'] = 10

        for i in range(n_tags):
            ax.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0], sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
                sklearn_transf[i*n_samples:(i+1)*n_samples,2], 'o', markersize=8, color=colours[i], alpha=0.5, label=tags[i])

        plt.title('PCA')
        ax.legend(loc='upper right')

    # plt.savefig("%s.png" % (dimensions), bbox_inches='tight',dpi=200)
    plt.show()
    # plt.close()

    return True
def plotGraph(samples, word, dimensions):
    if dimensions == '2D':
        sklearn_pca = sklearnPCA(n_components=2)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        plt.plot(sklearn_transf[:,0],sklearn_transf[:,1],\
             'o', markersize=7, color='blue', alpha=0.5, label='')
        # plt.plot(sklearn_transf[1::2,0], sklearn_transf[1::2,1],\
        #      '^', markersize=7, color='red', alpha=0.5, label='Matrix')

        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
    #     plt.xlim([-4,4])
        plt.ylim([-.8,.8])
        plt.legend()
        plt.title('Word embeddings PCA')

        print sklearn_transf

    elif dimensions == '3D':
        sklearn_pca = sklearnPCA(n_components=3)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        fig = plt.figure(figsize=(8,8))
        ax = fig.add_subplot(111, projection='3d')
        plt.rcParams['legend.fontsize'] = 10
        ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
            sklearn_transf[:,2], 'o', markersize=8, color='blue', alpha=0.5, label='')
        # ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
        #     sklearn_transf[:,2], '^', markersize=8, alpha=0.5, color='red', label='Matrix')

        plt.title('Word embeddings PCA')
        ax.legend(loc='upper right')

        print sklearn_transf

    plt.savefig("%s-%s.png" % (word, dimensions), bbox_inches='tight', dpi=200)
    plt.close()

    return True
def Seleccion_Ratios(df):

    import numpy as np
    import pandas as pd
    
    from sklearn import tree
    #from sklearn import metrics
    from sklearn import cross_validation
    from sklearn.decomposition import PCA as sklearnPCA
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestClassifier
    
    # Eliminamos antes del cálculo de las PCAs las columnas target e id.
    
    df.columns = [x.lower() for x in df.columns]
    objetivo = [col for col in df.columns if 'target' in col]
    objetivo = ''.join(objetivo)

    dfBorrar = df[['id', objetivo]]
    borrar = ['id', objetivo]
    dfaux = df.drop(borrar, axis=1)
    
    ListaColumnas= dfaux.columns
    tamDf = len(dfaux.columns)
    X_std = StandardScaler().fit_transform(dfaux.values)
    pca=sklearnPCA(n_components=tamDf).fit_transform(X_std)
    columnas_pca=[]
   
    for i in range(0,pca.shape[0]):
        v="VAR_PCA_"+str(i)
        columnas_pca.append(v)

    df1=pd.DataFrame(X_std,columns=ListaColumnas)
    df2=pd.DataFrame(pca,columns=columnas_pca)
    
   
    df_PCA=pd.concat([df1,df2],axis=1)
    
    y = df[objetivo]
   
    
    forest = RandomForestClassifier(n_estimators=250, random_state=0)
    forest.fit(df_PCA, y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
    indices = np.argsort(importances)[::-1]

    # Obtenemos el ranking de los mejores 30
    print("TOP 30:")
    
    for f in range(30):
        print("%d. Ratio %s (%f) " % (f + 1, df_PCA.columns[indices[f]], importances[indices[f]] ))
Beispiel #45
0
    def plot(self):

        self.train()
        # this will get data frame in self.mllib.X_train
        X = self.mllib.X_train.iloc[:,:-1]
        Y = self.mllib.X_train.iloc[:,-1]

        # get data in 3D axis
        scaler = sklearnPCA(n_components=3).fit(X)
        X = scaler.transform(X)
        Y = Y.reshape(Y.shape[0],1)
        X = np.append(X, Y, 1)

        self.mllib.plot(X)
Beispiel #46
0
	def pca_built(self, all_samples):
		from sklearn.decomposition import PCA as sklearnPCA
		sklearn_pca = sklearnPCA(n_components=2)
		sklearn_transf = sklearn_pca.fit_transform(all_samples.T)
		sklearn_transf = sklearn_transf*(-1)
		plt.plot(sklearn_transf[0:20, 0], sklearn_transf[0:20, 1], 'o', markersize=7, color='yellow', alpha=0.5, label='class1')
		plt.plot(sklearn_transf[20:40, 0], sklearn_transf[20:40, 1], '^', markersize=7, color='black', alpha=0.5, label='class2')
		plt.xlabel('x_values')
		plt.ylabel('y_values')
		plt.xlim([-4, 4])
		plt.ylim([-4, 4])
		plt.legend()
		plt.title('Transformed samples with class labels from built PCA')
		plt.draw()
		plt.show()
def dim_reduction_PCA(X,n_dim):
    """ Reduce the dimension by PCA.

    :param X: matrix data (n*k), n is the number of samples. k is the dimension of each sample
    :param n_dim: number of dimension we desired to reduce to.
    :return reduced_X:matrix data(n*n_dim)
    """

    try:
        reduced_X = sklearnPCA(n_components=n_dim).fit_transform(X)
    except:
        print ("Dimension Error")
        reduced_X = []
    finally:
        return reduced_X
def best_dimension(X,n_com = 0.8):
    """ get the number of dimension

    :param X: matrix data (n*k), n is the number of samples. k is the dimension of each sample
    :param n_dim: number of dimension we desired to reduce to.
    :return best_dimension:
    """

    try:
        pca = sklearnPCA(n_components=n_com)
        pca.fit_transform(X)
    except:
        print ("Dimension Error")
        return 0
    finally:
        return pca.n_components_
Beispiel #49
0
def deaPCA(df, allres=False, normalise=False, plot=True):
    """
    Extract principal components from pandas dataframe and shift distribution
    so that all values are strictly positive, as required for DEA.

    Takes:
        df: A dataframe of series to run the PCA on.
        allres: Boolean. Set True if you would like to get the PCA object
                returned instead of the transformed data. This can be
                useful if you wish to use the entire results of the PCA.
                The object is a fit_transformed sklearn.decomposition.PCA
                object.
        normalise: Boolean. Set True to normalise the series to a z-score
                before transforming.
        plot: Should the function display a plot of the variance explained?
    """

    from sklearn.decomposition import PCA as sklearnPCA

    if normalise:
        df = normalise_df(df)

    indat_pca = sklearnPCA()
    indat_transf = pd.DataFrame(
        indat_pca.fit_transform(df.values), index=df.index)

    pca_colnames = ["PCA" + str(i) for i in indat_transf.columns]
    indat_transf.columns = pca_colnames

    indat_transf_pos = _all_positive(indat_transf)

    if plot:
        _, ax1 = plt.subplots()
        ax1.plot(np.array(indat_pca.explained_variance_ratio_).cumsum())
        ax1.bar(np.arange(0.1, len(indat_pca.explained_variance_ratio_), 1),
                np.array(indat_pca.explained_variance_ratio_))
        ax1.legend(['Cumulative variance explained',
            'Variance explained by component'], loc='center right')
        ax1.set_ylabel('Proportion of variance explained')
        ax1.set_title('Variance explained by each principal component')
        ax1.set_xlim(right=len(indat_pca.explained_variance_ratio_))
        ax1.set_ylim(top=1)

    if allres:
        return indat_pca
    else:
        return indat_transf_pos
Beispiel #50
0
def MakeBlocksArray(band):
    path ='/Users/ryszardcetnarski/Desktop/PcaResults/'

    plt.style.use('seaborn-bright')
    db = LoadDatabase()
    all_normed = []
    for name, subject in db.groupby(db.index):
        blocks = ExtractBlocks(subject, 'training', band)
       # return blocks
        #blocks_normed =( zscore(blocks, axis = None).T -  zscore(blocks, axis = None)[:,0][:, np.newaxis].T).T
        blocks_normed = zscore(blocks, axis = None)
        all_normed.append(pd.DataFrame(blocks_normed, index= subject.index))
    all_normed = pd.concat(all_normed)
    all_normed['condition'] = db['condition']

    label_dict = {'plus':0,
                  'minus':1,
                  'control':2,
                  'sham':3}
    color = ['r', 'b','grey','g']




    X = all_normed.ix[:,0:10].values
    y = all_normed.ix[:,10].values
    X_std = StandardScaler().fit_transform(X)


    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X)

    fig = plt.figure()
    fig.suptitle(band, fontweight = 'bold')
    ax = fig.add_subplot(111)
    for idx, row in enumerate(Y_sklearn):
        ax.scatter(row[0], row[1], color = color[label_dict[all_normed.iloc[idx]['condition']]], alpha = 0.5)

    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 2')
    ax.legend( labels=('plus', 'minus', 'control'))
    legend = ax.get_legend()

    legend.legendHandles[0].set_color('red')
    legend.legendHandles[1].set_color('blue')
    legend.legendHandles[2].set_color('green')
Beispiel #51
0
def PcaFilter(X, name, PLOT_ON, method):
    """Individual subjects have outlier blocks filtered out based on malahanobis distance of their pca 1st and 2nd component
    (each subject is projected into their own variance space).
    Returns mask array (of 0's and 1's lenght of X) indicing bad and good blocks"""
    #The convention for mask arrays is 0 - inlier, 1 - outlier

    #Pca decomposition into first two components
    sklearn_pca = sklearnPCA(n_components=2)
    pcs = sklearn_pca.fit_transform(X)


    if(method == 'outlier'):
        #This index corresponds to the original index on the array of time series
        #last argument is the threshold of how many standard deviations away a point is considered an outlier
        outlier_idx = MD_removeOutliers(pcs[:,0], pcs[:,1], 2)
        #this will be used for a boolean array for filtering.
        mask_array = np.zeros(len(pcs))
        if(len(outlier_idx) >0 ):
            mask_array[outlier_idx] = 1


    if (method == 'cluster'):
        mask_array = Cluster(pcs)

    if(PLOT_ON):
        colors = ['r', 'b']
        fig = plt.figure()
        fig.suptitle(name)
        ax1 = fig.add_subplot(211)
        ax2 = fig.add_subplot(212)

        #Plot PCA scores and mark outliers
        ax2.scatter(pcs[:,0], pcs[:,1], c = mask_array, cmap = 'jet', s = 60, marker = 'o')
        #Print variance ratio
        ax2.annotate(sklearn_pca.explained_variance_ratio_,xy= (1,1), xycoords='axes fraction', horizontalalignment='right', verticalalignment='top')
        #Plot original signals and mark PCA indentified outliers
        for idx,row in enumerate(X):
            ax1.plot(row, color =colors[int(mask_array[idx])], alpha = 0.2)

    return mask_array
Beispiel #52
0
def PCA(X, labels):

    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X)

    fig = plt.figure()
   # fig.suptitle(band, fontweight = 'bold')
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)

    km = KMeans(n_clusters=2)
    km.fit(Y_sklearn)


   #ax.scatter(Y_sklearn[:,0], Y_sklearn[:,1])

    #tresh = [0 if i >4 else 1 for i  in Y_sklearn[:,0]]
    colors = ['blue', 'red']
    for idx, row in enumerate(Y_sklearn):
        ax2.scatter(row[0], row[1], color =colors[km.labels_[idx]], alpha = 0.5)
        ax1.plot(X[idx,:], color = colors[km.labels_[idx]], alpha = 0.2)

    labels.iloc[km.labels_ ==1].to_csv('/Users/ryszardcetnarski/Desktop/Nencki/Badanie_NFB/Dane/miesniowcy_pca.csv ')
    return km.labels_, labels.iloc[km.labels_ ==1]
def combine_lda_pca(X, y):
    sklearn_lda = LDA(n_components=2)
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)
    sklearn_pca = sklearnPCA(n_components=2) #PCA
    X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
    plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA via scikit-learn', mirror=(-1))
Beispiel #54
0
    # Male
    list = []
    # Fill the list
    for i in maleIndex:
        list.append(distances[column][i])
    ci = meanConfidenceInterval(list)
    row = pd.Series(["M", str(column), ci[0], ci[1], ci[2]])
    ciDf = ciDf.append(row, ignore_index=True)
    # Female
    list = []
    # Fill the list
    for i in femaleIndex:
        list.append(distances[column][i])
    ci = meanConfidenceInterval(list)
    row = pd.Series(["F", str(column), ci[0], ci[1], ci[2]])
    ciDf = ciDf.append(row, ignore_index=True)
# Set the dataframe columns
ciDf.columns = ("gender", "distance", "mean", "lowerBound", "upperBound")
# Save the dataframe
ciDf.to_csv("confidence_intervals", index=False)

# PCA
if plotPCA:
    pca = sklearnPCA(n_components=3).fit_transform(difference)
    x = [p[0] for p in pca]
    y = [p[1] for p in pca]
    z = [p[2] for p in pca]
    ax = plt.axes(projection="3d")
    ax.scatter3D(x, y, z)
    plt.show()
Beispiel #55
0
def cluster_snapshot():
    #args = flask.request.args
    # Get all the form arguments in the url with defaults
    #new_att = str(getitem(args, 'device_button','G_Swap_Used'))
    request_att=[]
    if request.method == "POST":
        #checked = 'att' in request.form
        request_att = request.form.getlist('att')
        #any_selected = bool(selected)
        request_att=[x.encode('UTF8') for x in request_att]
        #print(type(request_att))
        #return str(request_att)

        
    #print (new_att)

    n_times=getData_N_Min_cluster(5)
    #n_times=cursor
    if n_times.count()==0:
        print ("No Data")
        #exit(1)
    data=dict()
    t=0
    data=[]
    #Target only swap space

    #'compute-2-28' removed
    machine=['compute-2-29',  'compute-9-30', 'compute-9-36', 'compute-9-35','compute-9-34','compute-2-23', 'compute-2-25', 'compute-2-24', 'compute-2-27', 'compute-2-26', 'compute-6-29', 'compute-6-28', 'compute-6-25', 'compute-6-24', 'compute-6-27', 'compute-6-26', 'compute-6-23', 'compute-9-33', 'compute-9-32', 'compute-22-17', 'compute-22-16', 'compute-22-15', 'compute-22-14', 'compute-22-13', 'compute-22-12', 'compute-22-11', 'compute-22-18', 'compute-7-39', 'compute-7-38', 'compute-21-29', 'compute-21-28', 'compute-21-27', 'compute-21-26', 'compute-21-25', 'compute-21-24', 'compute-21-23', 'compute-5-1', 'compute-14-1', 'compute-14-2', 'compute-14-3', 'compute-14-4',  'compute-14-6', 'compute-14-7', 'compute-14-8', 'compute-13-6', 'compute-13-5', 'compute-13-4', 'compute-7-40', 'compute-13-2', 'compute-13-1', 'compute-14-30', 'compute-5-8', 'compute-14-32', 'compute-14-33', 'compute-14-34', 'compute-14-35', 'compute-18-18', 'compute-14-37', 'compute-14-38', 'compute-18-17', 'compute-5-3', 'compute-18-15', 'compute-5-5', 'compute-18-13', 'compute-5-7', 'compute-5-6', 'compute-6-2', 'compute-3-41', 'compute-6-1', 'compute-6-6', 'compute-6-7', 'compute-6-4', 'compute-6-5', 'compute-6-8', 'compute-13-28', 'compute-13-29', 'compute-13-26', 'compute-13-27', 'compute-13-24', 'compute-13-25', 'compute-13-23', 'compute-2-10', 'compute-2-11', 'compute-2-12', 'compute-2-14', 'compute-2-15', 'compute-2-16', 'compute-2-17', 'compute-2-18', 'compute-14-40', 'compute-2-8', 'compute-2-9', 'compute-2-7', 'compute-20-40', 'compute-1-9', 'compute-1-8', 'compute-6-11', 'compute-8-40', 'compute-6-14', 'compute-6-15', 'compute-6-16', 'compute-6-17', 'compute-6-10',  'compute-6-12', 'compute-6-13', 'compute-6-18', 'compute-4-29', 'compute-4-28', 'compute-23-38', 'compute-22-2', 'compute-23-36', 'compute-23-37', 'compute-23-34', 'compute-23-35', 'compute-4-27', 'compute-23-33', 'compute-4-25', 'compute-11-18', 'compute-8-38', 'compute-8-39', 'compute-11-17', 'compute-11-16', 'compute-22-40', 'compute-1-11', 'compute-1-10', 'compute-1-13', 'compute-1-12', 'compute-1-15', 'compute-1-14', 'compute-1-17', 'compute-1-16', 'compute-5-15', 'compute-5-14', 'compute-12-8', 'compute-5-16', 'compute-5-11', 'compute-5-10', 'compute-5-13', 'compute-5-12', 'compute-12-2', 'compute-12-3', 'compute-12-1', 'compute-12-6', 'compute-12-7', 'compute-12-4', 'compute-12-5', 'compute-12-27', 'compute-12-26', 'compute-12-18', 'compute-19-37', 'compute-19-36', 'compute-12-10', 'compute-12-11', 'compute-12-12', 'compute-12-13', 'compute-12-14', 'compute-12-15', 'compute-12-16', 'compute-12-17', 'compute-20-37', 'compute-20-36', 'compute-20-35', 'compute-20-39', 'compute-20-38', 'compute-23-39', 'compute-23-32', 'compute-4-26', 'compute-23-30', 'compute-12-23', 'compute-12-22', 'compute-12-25', 'compute-12-24', 'compute-19-39', 'compute-19-38', 'compute-12-29', 'compute-12-28', 'compute-19-35', 'compute-9-27', 'compute-21-40', 'compute-9-28', 'compute-9-29', 'compute-11-40', 'compute-21-38', 'compute-21-39', 'compute-21-30', 'compute-21-33', 'compute-21-34', 'compute-21-35', 'compute-21-36', 'compute-21-37', 'compute-5-28', 'compute-5-29', 'compute-5-24', 'compute-5-25', 'compute-5-26', 'compute-5-27', 'compute-5-23', 'compute-13-18', 'compute-13-13', 'compute-13-12', 'compute-13-11', 'compute-13-10', 'compute-13-17', 'compute-13-16', 'compute-13-15', 'compute-13-14', 'compute-14-15', 'compute-22-39', 'compute-22-38', 'compute-22-30', 'compute-22-33', 'compute-22-35', 'compute-22-34', 'compute-22-37', 'compute-22-36', 'compute-7-17', 'compute-7-16', 'compute-7-18', 'compute-5-17', 'compute-14-18', 'compute-14-12', 'compute-14-13', 'compute-14-10', 'compute-14-11', 'compute-14-16', 'compute-14-17', 'compute-14-14', 'compute-5-18', 'compute-21-8', 'compute-21-1', 'compute-21-2', 'compute-21-3', 'compute-21-4', 'compute-21-5', 'compute-21-6', 'compute-21-7', 'compute-4-30', 'compute-18-14', 'compute-23-27', 'compute-23-26', 'compute-12-37', 'compute-12-38', 'compute-21-11', 'compute-5-39', 'compute-5-38', 'compute-5-37', 'compute-5-36', 'compute-5-35', 'compute-5-34', 'compute-5-33', 'compute-5-32', 'compute-5-30', 'compute-22-3', 'compute-18-35', 'compute-22-1', 'compute-18-37', 'compute-22-7', 'compute-22-6', 'compute-22-5', 'compute-22-4', 'compute-22-8', 'compute-18-38', 'compute-18-39', 'compute-20-15', 'compute-20-17', 'compute-20-16', 'compute-20-18', 'compute-9-25', 'compute-2-38', 'compute-2-39', 'compute-2-36', 'compute-2-37', 'compute-2-34', 'compute-2-35', 'compute-2-32', 'compute-2-33', 'compute-2-30', 'compute-6-32', 'compute-6-33', 'compute-6-30', 'compute-6-36', 'compute-6-37', 'compute-6-34', 'compute-6-35', 'compute-6-38', 'compute-6-39', 'compute-9-26', 'compute-21-12', 'compute-21-13', 'compute-23-16', 'compute-23-17', 'compute-21-16', 'compute-21-17', 'compute-21-14', 'compute-21-15', 'compute-21-18', 'compute-23-18', 'compute-8-18', 'compute-8-16', 'compute-8-17', 'compute-11-39', 'compute-11-38', 'compute-22-28', 'compute-22-29', 'compute-22-23', 'compute-22-26', 'compute-22-27', 'compute-22-24', 'compute-22-25', 'compute-13-8', 'compute-13-7', 'compute-19-13', 'compute-19-15', 'compute-19-14', 'compute-19-17', 'compute-19-16',  'compute-14-27', 'compute-14-26', 'compute-14-25', 'compute-14-24', 'compute-14-23', 'compute-14-36', 'compute-14-29', 'compute-14-28', 'compute-18-16', 'compute-14-39', 'compute-3-39', 'compute-3-38', 'compute-5-2', 'compute-13-39', 'compute-13-38', 'compute-13-35', 'compute-13-34', 'compute-13-37', 'compute-13-36', 'compute-13-30', 'compute-13-33', 'compute-13-32', 'compute-3-40', 'compute-6-3', 'compute-13-40', 'compute-18-36', 'compute-23-29', 'compute-23-28', 'compute-23-25', 'compute-4-32', 'compute-4-33', 'compute-4-34', 'compute-4-35', 'compute-19-40', 'compute-18-40']
    for t1 in n_times:
        devices=t1['data'].keys()
        for d in machine:
            lst=[]
            lst.append(d)
            for x in xrange(0,39):
                lst.append(t1['data'][d][x][1])
            data.append(lst)
        t=t+1

    res=['Device','G_Swap_Total', 'G_Swap_Free', 'G_Swap_Used', 'G_Proc_Run', 'G_Cpu_User', 'G_Cpu_Wio', 'G_Load_One', 'G_Load', 'G_Five', 'G_Load_Fifteen', 'G_Mem_Cached', 'G_Mem_Total', 'T_State', 'T_Slots', 'T_SlotsUsed', 'T_AvailMem(MB)', 'T_TotalMem(MB)/Swap', 'T_Time_Last_Rec', 'T_LoadAve', 'T_NetLoad(MB)',
    'N_Status', 'N_Swap_Service', 'N_Swap_State', 'N_Swap_Info', 'N_IPMI_Service', 'N_IPMI_State', 'N_IPMI_Info', 'N_FreeSpace_Service', 'N_FreeSpace_State', 'N_FreeSpace_Info', 'N_CVMFS-OSG_Service', 'N_CVMFS-OSG_State', 'N_CVMFS-OSG_Info', 'N_CVMFS-CERN_Service', 'N_CVMFS-CERN_State', 'N_CVMFS-CERN_Info',
     'N_CVMFS-CONDB_Service', 'N_CVMFS-CONDB_State', 'N_CVMFS-CONDB_Info']


    att=['G_Swap_Used','G_Cpu_User', 'G_Cpu_Wio', 'G_Load_One', 'G_Load', 'G_Five', 'G_Load_Fifteen', 'G_Mem_Cached', 'T_AvailMem(MB)', 'T_LoadAve', 'T_NetLoad(MB)']

    new_att=['Device','G_Swap_Used', 'G_Proc_Run', 'G_Cpu_User', 'G_Cpu_Wio', 'G_Load_One', 'G_Load', 'G_Five', 'G_Load_Fifteen', 'G_Mem_Cached', 'T_State',  'T_Slots', 'T_SlotsUsed','T_AvailMem(MB)', 'T_Time_Last_Rec', 'T_LoadAve','N_Status', 'N_Swap_State','N_IPMI_State','N_IPMI_Info','N_FreeSpace_State', 'N_CVMFS-OSG_State', 'N_CVMFS-CERN_State', 'N_CVMFS-CONDB_State']
    #new_att=['Device','G_Swap_Used','T_AvailMem(MB)','G_Five','G_Cpu_Wio']
    if request_att !=[]:
        new_att=request_att

    print (request_att)
    new_index=[]

    full_index=[]

    for i in new_att:
        full_index.append(res.index(i))

    for a in att:
        new_index.append(res.index(a))

    new_data=[]

    for d in data:
        core_count=int(d[14])
        if core_count!=0:
            for i in new_index:
                d[i]=round(float(d[i])/core_count,2)
                d[i]=unicode(d[i])
        tmp=[]
        for i in full_index:
            if i==res.index('N_IPMI_Info'):
                code_in_IPMI=re.findall(r'\d+',str(d[i]))
                if code_in_IPMI==[]:
                    d[i]='0'
                else:
                    d[i]=code_in_IPMI[0]
            tmp.append(d[i])
    #    tmp=[d[i] for i in full_index]
        new_data.append(tmp)

    df=pd.DataFrame(new_data)
    df.columns=new_att

    X = df.ix[:,1:len(df.columns)].values
    y = df.ix[:,0].values

    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(X)

    from sklearn.decomposition import PCA as sklearnPCA
    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn   = sklearn_pca.fit_transform(X_std)


    x_corr=[]
    y_corr=[]
    label=[]

    x_l=[]
    y_l=[]
    new_dim_data=dict()

    for lab in machine:
        x_fact = Y_sklearn[y==lab, 0].tolist()
        y_fact = Y_sklearn[y==lab, 1].tolist()
        new_dim_data[lab] =[x_fact,y_fact]
        x_l.append(x_fact)
        y_l.append(y_fact)
    # Store new dimensions in database
    post={"date":datetime.datetime.utcnow(),"data":new_dim_data}
    d_var=db_new_dim.data
    post_id=d_var.insert_one(post).inserted_id


    for x in x_l:
        for x1 in x:
            x_corr.append(x1)

    for y in y_l:
        for y1 in y:
            y_corr.append(y1)

    l=len(x_l[0])

    for lab in machine:
        for i in [lab for x in range(0,l)]:
            label.append(i)

    new_arr=np.array(zip(x_corr,y_corr))

    k_means=KMeans(n_clusters=4)
    k_means.fit(new_arr)

    centroid=k_means.cluster_centers_
    labels=k_means.labels_

    colors=["green","red","cyan","yellow","blue"]

    color_src=[]
    for i in range(len(x_corr)):
        color_src.append(colors[labels[i]])


    #output_file("toolbar.html")
    TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select"

    source = ColumnDataSource(
            data=dict(
                x=x_corr,
                y=y_corr,
                desc=label,
               # colors=color_src,
                
            )
        )
    hover = HoverTool(
            tooltips="""
            <div>
                
                <div>
                    <span style="font-size: 17px; font-weight: bold;">@desc</span>
                    <span style="font-size: 15px; color: #966;">[$index]</span>
                </div>
                <div>
                    <span style="font-size: 15px;">Location</span>
                    <span style="font-size: 10px; color: #696;">($x, $y)</span>
                </div>
            </div>
            """
        )
    #TOOLS= [BoxZoomTool(), ResetTool(),hover,ResizeTool(),WheelZoomTool()]

    TOOLS=["pan,wheel_zoom,box_zoom,reset,resize",hover]
    p = figure(plot_width=600, plot_height=600, tools=TOOLS,
               title="Mouse over the dots")

    p.circle('x', 'y', size=30, source=source,fill_color=color_src)
    p.scatter(centroid[:,0],centroid[:,1], color='black')#,s=200,linewidths=5,zorder=10)

    js_resources = INLINE.render_js()
    css_resources = INLINE.render_css()

    # For more details see:
    #   http://bokeh.pydata.org/en/latest/docs/user_guide/embedding.html#components
    script, div = components(p, INLINE)
    html = flask.render_template(
        'cluster_snapshot.html',
        plot_script=script,
        plot_div=div,
        js_resources=js_resources,
        css_resources=css_resources,
    )
    return encode_utf8(html)
#     plt.tight_layout()
#     plt.show()


# break
from sklearn.preprocessing import StandardScaler

X_std = StandardScaler().fit_transform(X)

print X_std


from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=2)
Y_sklearn = sklearn_pca.fit_transform(X_std)


x_corr = []
y_corr = []
label = []
# print Y_sklearn

x_l = []
y_l = []
for lab in machine:
    x_l.append(Y_sklearn[y == lab, 0].tolist())
    y_l.append(Y_sklearn[y == lab, 1].tolist())

for x in x_l:
#from matplotlib import pyplot as plt
import numpy as np


# feature_dict = {0: 'G_Swap_Total', 1: 'G_Swap_Free', 2: 'G_Swap_Used', 3: 'G_Proc_Run', 4: 'G_Cpu_User', 5: 'G_Cpu_Wio', 6: 'G_Load_One', 7: 'G_Load', 
# 8: 'G_Five', 9: 'G_Load_Fifteen', 10: 'G_Mem_Cached', 11: 'G_Mem_Total', 12: 'T_State', 13: 'T_Slots', 14: 'T_SlotsUsed', 15: 'T_AvailMem(MB)', 16: 'T_TotalMem(MB)/Swap', 17: 'T_Time_Last_Rec', 18: 'T_LoadAve', 19: 'T_NetLoad(MB)'}

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)

CLUSTER_SIZE=4


from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=CLUSTER_SIZE)
Y_sklearn 	= sklearn_pca.fit_transform(X_std)


x_corr=[]
y_corr=[]
label=[]


x_l=[]
y_l=[]
for lab in machine:
	x_l.append(Y_sklearn[y==lab, 0].tolist())
	y_l.append(Y_sklearn[y==lab, 1].tolist())

for x in x_l: