def performLDA(data_to_fit, y, numComponent=None):
    data_to_fit_np_t = np.array(data_to_fit).T
    if numComponent is None:
        numComponent = len(data_to_fit_np_t)
    lda_model = LinearDiscriminantAnalysis(n_components=numComponent)
    lda_results = lda_model.fit_transform(data_to_fit_np_t, y)
    return lda_model, lda_results
def assess_embedding(to_vec):
	"""
	Returns LDA classification score and projected data
	"""
	(x_data, y_data) = get_x_y_matrices(to_vec)

	lda = LDA(n_components=2)
	x_prime = lda.fit_transform(x_data, y_data)
	score = lda.score(x_data, y_data)

	return (x_prime.reshape(26, ), y_data, score)
def transformLDA(X,y,xTest):
    
    originalSize = np.size(X,1)
    print("Learning LDA \nProjecting {} features to 1 component".format(originalSize))
    priors = [0.5,0.5]

    clf = LinearDiscriminantAnalysis('svd', n_components=1,priors=priors)
    print(X.shape)
    X = clf.fit_transform(X,y)
    print("True size of X : ", X.shape)

    if xTest != []:
        xTest = clf.transform(xTest)
    return X,xTest
Beispiel #4
0
def run_LDA(df):
    """
    Run LinearDiscriminantAnalysis on input dataframe (df) and return
    transformed data, scalings and
    """
    # Prep variables for sklearn LDA
    X = df[range(1, df.shape[1])].values     # input data matrix
    y = df["Condition"].values               # data categories list

    # Calculate LDA
    sklearn_lda = LDA()
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)
    exp_var = sklearn_lda.explained_variance_ratio_

    return X_lda_sklearn, y, exp_var
Beispiel #5
0
def run_LDA(df):
    # Prep variables for sklearn LDA
    X = df[range(2, df.shape[1])].values     # input data matrix
    y = df['Condition'].values               # data categories list

    # Calculate LDA
    sklearn_lda = LDA(n_components=2)
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)

    # Quality Test - can be ignored
#     print len(X_lda_sklearn)
#     print sklearn_lda.predict_proba(X)
#     print(sklearn_lda.score(X, y))

    return X_lda_sklearn, y
Beispiel #6
0
def train_model(csv_path):
    '''
    INPUT: 
    audio features csv with 'class' labels included

    OUTPUT:
    three pickled models stored in the models dir
    - StandardScaler (sklearn)
    - LinearDiscriminantAnalysis (sklearn)
    - SVC (sklearn)

    Takes an audio feature csv (created from 'feature_extraction.py') and returns pickled models to use
    '''
    csv = LOCAL_REPO_DIR + csv_path
    df = pd.read_csv(csv_path)

    # extracts X, y for training model from dataframe
    X = df.drop(['class', 'fold', 'Unnamed: 0'], axis=1).values
    y = df['class'].values

    # feature matrix has many different scales, need to standardize
    ss = StandardScaler()
    X = ss.fit_transform(X)

    lda = LinearDiscriminantAnalysis()
    X_lda = lda.fit_transform(X, y)

    # trains model using best performing model/hyperparameters using kfold grid search
    svm = SVC(C=1, gamma=0.04)
    svm.fit(X_lda, y)
    
    # accuracy check to make sure the model is performing
    y_pred_svm = svm.predict(X_lda)
    print 'model accuracy: ', accuracy_score(y, y_pred_svm)

    # cPickles models for later use
    with open(LOCAL_REPO_DIR + 'model/svm.pkl', 'wb') as f:
        cPickle.dump(svm, f)

    with open(LOCAL_REPO_DIR + 'model/lda.pkl', 'wb') as f:
        cPickle.dump(lda, f)

    with open(LOCAL_REPO_DIR + 'model/ss.pkl', 'wb') as f:
        cPickle.dump(ss, f)
Beispiel #7
0
def fit_svm(prints):
    print "Fitting to SVM...."
    dataframe = pd.DataFrame(prints)
    y = dataframe[2]
    X = dataframe[0]

    # in case feature matrix has many different scales, need to standardize
    ss = StandardScaler()
    X = ss.fit_transform(X)


    lda = LinearDiscriminantAnalysis()
    X_lda = lda.fit_transform(X_1, y)

    # trains model using best performing model/hyperparameters using kfold grid search
    svm = SVC(C=1, gamma=0.04)
    svm.fit(X_lda, y)

    pickle_model(svm, 'svm')
Beispiel #8
0
def run_LDA(df):
    """
    Run LinearDiscriminantAnalysis on input dataframe (df) and return
    transformed data, scalings and explained variance by discriminants.
    """
    # Prep variables for sklearn LDA
    X = df.iloc[:, 1:df.shape[1]].values     # input data matrix
    y = df["Condition"].values               # data categories list

    # Calculate LDA
    sklearn_lda = LDA()
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)
    try:
        exp_var = sklearn_lda.explained_variance_ratio_
    except AttributeError as ae:
        print("\n{}: explained variance cannot be computed.\nPlease check this GitHub PR:"
              " https://github.com/scikit-learn/scikit-learn/pull/6027".format(ae))
        return X_lda_sklearn, y, "NA"
    return X_lda_sklearn, y, exp_var
Beispiel #9
0
def project_back(x,digits):
    myLDA = LDA()
    new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels)
    print(new_train.shape)
    m = 0
    n = 1
    plt.figure()
    plt.scatter(new_train[digits.train_Labels == 0,m],new_train[digits.train_Labels == 0,n], color='Green', s= 1)
    plt.scatter(new_train[digits.train_Labels == 1,m],new_train[digits.train_Labels == 1,n], color='Blue', s= 1)
    plt.scatter(new_train[digits.train_Labels == 2,m],new_train[digits.train_Labels == 2,n], color='Red', s= 1)
    plt.scatter(new_train[digits.train_Labels == 3,m],new_train[digits.train_Labels == 3,n], color='Purple', s= 1)
    plt.scatter(new_train[digits.train_Labels == 4,m],new_train[digits.train_Labels == 4,n], color='Black', s= 1)
    plt.scatter(new_train[digits.train_Labels == 5,m],new_train[digits.train_Labels == 5,n], color='Brown', s= 1)
    plt.scatter(new_train[digits.train_Labels == 6,m],new_train[digits.train_Labels == 6,n], color='Silver', s= 1)
    plt.scatter(new_train[digits.train_Labels == 7,m],new_train[digits.train_Labels == 7,n], color='Cyan', s= 1)
    plt.show()
    y = [email protected]_[:9,:] # I really don't know if this will work since there are 10 coef things
    weighted_y2 = y[:,:154]@x.V[:154,:] + x.centers
    plt.imshow(weighted_y2[0,:].reshape(28,28))
    plt.show()
def plot_sklearn_lda_with_lr(X_train, X_test, y_train, y_test):
    lda = LDA(n_components=2)
    X_train_lda = lda.fit_transform(X_train, y_train)

    lr = LogisticRegression()
    lr = lr.fit(X_train_lda, y_train)

    plot_decision_regions(X_train_lda, y_train, classifier=lr)
    plt.xlabel('LD 1')
    plt.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()

    X_test_lda = lda.transform(X_test)

    plot_decision_regions(X_test_lda, y_test, classifier=lr)
    plt.xlabel('LD 1')
    plt.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()
    def apply(self):
        transformed = components = None
        if self.data is not None:
            self.data = Continuize(Impute(self.data))
            lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
            X = lda.fit_transform(self.data.X, self.data.Y)
            dom = Domain([ContinuousVariable('Component_1'),
                          ContinuousVariable('Component_2')],
                         self.data.domain.class_vars, self.data.domain.metas)
            transformed = Table(dom, X, self.data.Y, self.data.metas)
            transformed.name = self.data.name + ' (LDA)'
            dom = Domain(self.data.domain.attributes,
                         metas=[StringVariable(name='component')])
            metas = np.array([['Component_{}'.format(i + 1)
                                  for i in range(lda.scalings_.shape[1])]],
                                dtype=object).T
            components = Table(dom, lda.scalings_.T, metas=metas)
            components.name = 'components'

        self.send("Transformed data", transformed)
        self.send("Components", components)
def do_LDA2D_KNN(digits,p,q):
    l,r = LDA2D.iterative2DLDA(digits.train_Images, digits.train_Labels, p, q, 28, 28)

    new_train = np.zeros((digits.train_Images.shape[0],p*q))
    for i in range(digits.train_Images.shape[0]):
        new_train[i] = (np.transpose(l)@digits.train_Images[i].reshape(28,28)@r).reshape(p*q)
    new_test = np.zeros((digits.test_Images.shape[0],p*q))
    for i in range(digits.test_Images.shape[0]):
        new_test[i] = (np.transpose(l)@digits.test_Images[i].reshape(28,28)@r).reshape(p*q)
    myLDA = LDA()
    x = center_matrix_SVD(new_train)
    new_new_train = myLDA.fit_transform(new_train-x.centers,digits.train_Labels)
    new_new_test = myLDA.transform(new_test-x.centers)
    labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'euclidean')
    pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_EU.p','wb'))
    #pickle.dump(nearest, open('NLDA2DFDA'+ str(p) + 'x' + str(q) + '_EU.p','wb'))
    labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'cityblock')
    pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_CB.p','wb'))
    #pickle.dump(nearest, open('NLDA2DFDA'+ str(p) + 'x' + str(q) + '_CB.p','wb'))
    labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'cosine')
    pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_CO.p','wb'))
Beispiel #13
0
def leave_one_out(feature_dict, glob, classifier, title):
    # feature_dict is a dictionary of feature names and a triple of booleans defining
    # which summary metrics to include respectively: (mean, std, measurewise)
    all_features = glob.get_features(feature_dict)
    all_classes = glob.get_feature('class', (True, True, True))
    
    class_pred, class_real = [], []
    
    vis.print_stars(newline=True)
    print("Testing " + title + " classification with features:")
    print(list(feature_dict.keys()))
    vis.print_dashes()
    sys.stdout.write("\r0 / %d samples processed (...)" % len(all_features))
    
    pca = LinearDiscriminantAnalysis()
    all_features = pca.fit_transform(all_features, all_classes.ravel())
    start = time.clock()
    
    for idx in range(len(all_features)):
        train_features = np.delete(all_features, idx, 0)
        train_classes = np.delete(all_classes, idx, 0)
        
        test_feature = np.transpose(all_features[idx,:]).reshape((1, train_features.shape[1]))
        test_class = np.transpose(all_classes[idx,:])
        
        predicted_class = classify(train_features, train_classes, test_feature, classifier)
        
        class_pred.append(predicted_class)
        class_real.append(genre_from_int(test_class))
    
        t = time.clock() - start
        time_per_iteration = t / (idx + 1)
        remaining = time_per_iteration * (len(all_features) - (idx + 1))
        
        sys.stdout.write("\r%d / %d samples processed (%02d:%02d:%02d left)" % 
            ((idx + 1), len(all_features), remaining / 3600, (remaining / 60) % 60, remaining % 60))
    
    return [class_pred, class_real]
Beispiel #14
0
def main():
    digits = mnist() # Creates a class with our mnist images and labels
    if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't
        x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info
        pickle.dump(x,open('Training SVD Data','wb'))
    else:
        x = pickle.load(open('Training SVD Data','rb'))  # If we already have the file just load it
    if 1: # if this is zero skip
        test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0))
        tic()
        myLDA = LDA()  # Create a new instance of the LDA class
        new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels)  # It will fit based on x.PCA
        new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset
        Knn_labels = local_kmeans_class(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data
        toc()
        pickle.dump(Knn_labels,open('Loc_kmeans_fda_lab','wb'))

    fda = pickle.load(open('Loc_kmeans_fda_lab','rb'))
    labels_Full = pickle.load(open('KNN_Full','rb'))
    loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb'))
    errors_fda,ind_fda = class_error_rate(np.transpose(fda),digits.test_labels)
    errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels)
    errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels)
    labels_50 = pickle.load(open('KNN_50','rb'))
    errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels)
    print(errors_full)
    plt.figure()
    plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda Kmeans')  #plots the 82.5%
    plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN')
    plt.plot(np.arange(10)+1, errors_full, color='Yellow', marker='o', markersize=10, label='Full Kmeans')
    plt.plot(np.arange(10)+1, errors_50, color='Red', marker='o', markersize=10, label='kNN 50')
    axes = plt.gca()
    axes.set_ylim([0.015,0.12])
    plt.grid(1) # Turns the grid on
    plt.title('Plot of Local Kmeans with FDA Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
    project_back(x,digits)
Beispiel #15
0
 def dimension_reduce(self,mode='L'):
     
     print 'Reduce Dimensions...'
     print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     
     raw_train=self.train.copy()
     train=self.train.copy()
     train_label=self.train_label['label'].values.copy()
     train_label=train_label.reshape((train_label.shape[0]))
         
     test=self.test.copy()
     test_label=self.test_label['label'].values.copy()
     test_label=test_label.reshape((test_label.shape[0]))
     
     flist=train.columns
     
     if mode.upper()=='L':
         lda=LinearDiscriminantAnalysis()
         X_new=lda.fit_transform(train.values,train_label)
         self.train=pd.DataFrame(X_new,columns=['DR'])
         self.test=pd.DataFrame(lda.transform(test[flist].values),columns=['DR'])
         
         tt=lda.coef_[0]
         ind=np.argsort(tt)
         features=raw_train.columns[ind[-100:]]
         feas=pd.DataFrame()
         feas['feature']=features
         feas['values']=tt[ind[-100:]]
         return feas
         
     elif mode.upper()=='P':
         pca = PCA(n_components=100)
         X_new=pca.fit_transform(train.values,train_label)
         self.train=pd.DataFrame(X_new)
         self.test=pd.DataFrame(pca.transform(test[flist].values))
         
     print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
Beispiel #16
0
 def best_lda_nba(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_nba_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     lda = LinearDiscriminantAnalysis(n_components=2)
     X_train_transformed = lda.fit_transform(X_train_scl, y_train)
     X_test_transformed = lda.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/nba_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
Beispiel #17
0
def main():
    digits = mnist() # Creates a class with our mnist images and labels
    if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't
        print("im here")   # Just wanted to check if it was going in here
        x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info
        pickle.dump(x,open('Training SVD Data','wb'))
    else:
        x = pickle.load(open('Training SVD Data','rb'))  # If we already have the file just load it
    if 0: # if this is zero skip
        test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0))
        tic()
        myLDA = LDA()  # Create a new instance of the LDA class
        new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels)  # It will fit based on x.PCA
        new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset
        Knn_labels, nearest = KNN(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data
        toc()
        pickle.dump(Knn_labels,open('FDAKNN_Lables','wb'))
        pickle.dump(nearest,open('FDAKNN_neastest','wb'))
    fda = pickle.load(open('FDAKNN_Lables','rb'))
    labels_Full = pickle.load(open('KNN_Full','rb'))
    labels_50 = pickle.load(open('KNN_50','rb'))
    errors_fda,ind_fda = class_error_rate(fda,digits.test_labels)
    errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels)
    errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels)
    plt.figure()
    plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda')  #plots the 82.5%
    plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN')
    plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='kNN 50')
    plt.grid(1) # Turns the grid on
    plt.title('Plot of Knn with FDA Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
    print(confusion_matrix(digits.test_labels,labels_Full[5]))
    print(confusion_matrix(digits.test_labels,fda[5]))
    print(confusion_matrix(digits.test_labels,labels_50[5]))
    """
Beispiel #18
0
    # remove axis spines
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    plt.tight_layout
    plt.grid()
    plt.show()


plot_pca()

# LDA via scikit-learn
# LDA
sklearn_lda = LDA(n_components=2)
X_lda_sklearn = sklearn_lda.fit_transform(X, y)


def plot_scikit_lda(X, title):
    ax = plt.subplot(111)
    for label, marker, color in zip(range(1, 4), ('^', 's', 'o'),
                                    ('blue', 'red', 'green')):
        plt.scatter(
            x=X[:, 0][y == label],
            y=X[:, 1][y == label] * -1,  # flip the figure
            marker=marker,
            color=color,
            alpha=0.5,
            label=label_dict[label])

    plt.xlabel('LD1')
Beispiel #19
0
for i, alpha in enumerate([0., 10., 1000.]):
    # Fit and transform data using PLDA
    plda = PLDA(alpha=alpha, n_components=2)
    X_plda = plda.fit_transform(X, y)

    # Compute classification accuracy
    acc = plda.score(X, y)

    # Plot transformed data
    plot_transform(X_plda, y, ax[0, i])
    ax[0, i].set_title("PLDA $\\alpha$={:.1f}\nacc={:.3f}".format(alpha, acc))

# For comparison, perform LDA
# Note: This should be the same as PLDA with alpha=0
lda = LinearDiscriminantAnalysis()
X_lda = lda.fit_transform(X, y)
acc = lda.score(X, y)
plot_transform(X_lda, y, ax[1, 0])
ax[1, 0].set_title("LDA\nacc={:.3f}".format(acc))

# For comparison, perform PCA
# Note: This should be the same as PLDA with very large alpha
pca = PCA()
X_pca = pca.fit_transform(X)
plot_transform(X_pca, y, ax[1, -1])
ax[1, 2].set_title("PCA\nacc=N/A")

# Ignore the middle subplot
ax[1, 1].axis('off')

plt.tight_layout()
Beispiel #20
0
# In[13]:

clf = KNeighborsClassifier(n_neighbors=7)
clf.fit(train_df[['sepel_len', 'sepel_width', 'pedal_len', 'pedal_width']], train_df['class'])
clf.score(test_df[['sepel_len', 'sepel_width', 'pedal_len', 'pedal_width']], test_df['class'])


# In[14]:

train_df.iloc[:, 0:4].head()


# In[15]:

sklearn_LDA = LDA(n_components=2)
train_r = sklearn_LDA.fit_transform(train_df.iloc[:, 0:4],train_df['class'])


# In[16]:

train_df_r=pd.DataFrame(train_r,columns=['feature1', 'feature2'])


# In[17]:

train_df_r = pd.concat([train_df_r,train_df['class'].reset_index(drop=True)], axis=1 )


# In[18]:

train_df_r.head()
Beispiel #21
0
sc = StandardScaler()
Xtrain_sc = sc.fit_transform(Xtrain)
Xtest_sc = sc.transform(Xtest)

#Componentes principales
from sklearn.decomposition import PCA
##como solo tengo 2 variables escojo 1
pca = PCA(n_components=1)
Xtrain_pca = pca.fit_transform(Xtrain_sc)
Xtest_pca = pca.transform(Xtest_sc)

#Discriminante linear
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=1)
Xtrain_lda = lda.fit_transform(Xtrain_sc, ytrain)
Xtest_lda = lda.transform(Xtest_sc)

#Kernel PCA
from sklearn.decomposition import KernelPCA

kpca = KernelPCA(n_components=1, kernel='rbf')
Xtrain_kpca = kpca.fit_transform(Xtrain_sc)
Xtest_kpca = kpca.transform(Xtest_sc)

#Regresion Logistica
#######################
from sklearn.linear_model import LogisticRegression
##R.logistica es un algoritmo iterativo por eso usamos random_state para tener aprox
##los mismos resultados
logistic = LogisticRegression(random_state=4)
Beispiel #22
0
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import kutuphane
 
giris, cikis, CustomerID =  kutuphane.dosya_oku('data/Credit_Card_Applications.csv')
#kisi_bilgisi['Country'] = kisi_bilgisi['Country'].replace([":",','],"").astype(int)

scaler = StandardScaler()
X = scaler.fit_transform(giris)

pca = PCA(n_components=50)
pca_x = pca.fit_transform(X)


accuracy, f1_skor = kutuphane.basari_hesaplaCV(pca_x, cikis,CustomerID)
print("pca basarisi = "+ str(accuracy) )


lda = LDA(n_components=2)
lda_x =lda.fit_transform(X,cikis)


accuracy, f1_skor = kutuphane.basari_hesapla(lda_x, cikis, CustomerID)
print("LDA basarisi = "+ str(accuracy) )
Beispiel #23
0
print(train_y.shape,test_y.shape)
from sklearn.preprocessing import LabelEncoder , OneHotEncoder
from sklearn.compose import ColumnTransformer

le=LabelEncoder()
train_y=le.fit_transform(train_y).reshape(-1,1)

le2=LabelEncoder()
test_y=le2.fit_transform(test_y).reshape(-1,1)
# # ---------------------------------------------------------------------------------

# DIMENSIONALITY REDUCTION USING LDA FOR VISUALIZATION
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda=LDA(n_components=2)
train_x=lda.fit_transform(train_x,train_y)
test_x=lda.transform(test_x)
# -----------------------------------------------------------------------------------
# pdb.set_trace()

# COMPARING MODEL
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC

models=[] #--- models will contain tuples whose fist element will be name and second element will be model
class Assignment(tk.Frame):
    def Apply(self):
        self.label_imgtype.destroy()
        plt.close('all')
        self.original_image, self.result_image, self.binary_image, self.filtered_image = feature_extractor_GUI.extract_features_prediction(
            self.filename)
        self.original_image = cv2.cvtColor(self.original_image,
                                           cv2.COLOR_BGR2RGB)
        feature_extractor_GUI.DEV_drawGui(self.original_image,
                                          self.result_image, self.binary_image)
        plt.figure(2)
        plt.plot(self.filtered_image)
        plt.xlabel('Angles(deg)')
        plt.xlabel('Angles(Deg)')
        plt.ylabel('Frequency')
        plt.title('Histogram of Filtered Image with Angle Calculation')
        plt.show()
        self.label_imgtype = tk.Label(root,
                                      text="Features Extracted Successfully")
        self.label_imgtype.config(font=("Times New Roman", 14))
        self.label_imgtype.grid(row=5, columnspan=2)
        self.feature_extraction_flag = True

    def browse(self):
        self.label_imgtype.destroy()
        self.filename = filedialog.askopenfilename()
        self.img = cv2.imread(self.filename, 1)
        self.b, self.g, self.r = cv2.split(self.img)
        self.img1 = cv2.merge((self.r, self.g, self.b))
        self.img2 = Image.fromarray(self.img1)
        self.img = self.img2.resize((512, 512))
        self.canvas.image = ImageTk.PhotoImage(self.img)
        self.canvas.create_image(0, 0, image=self.canvas.image, anchor='nw')
        self.label_imgtype = tk.Label(
            root, text="The Image Loaded, Features not Extracted")
        self.label_imgtype.config(font=("Times New Roman", 14))
        self.label_imgtype.grid(row=5, columnspan=2)

    def classify(self):
        #Check if feature extraction has been done, if not run the function
        if self.feature_extraction_flag is False:
            self.Apply()
        #Check if the model has been trained or not, if not train the model
        self.result_label.destroy()
        if self.train_flag is False:
            self.model_train()
        filename = 'features_test.csv'
        data, predict_features, _ = data_file_reader.file_reader(
            filename, 'test')
        lda_test_set = self.lda.transform(predict_features)
        prediction = self.clf.predict(lda_test_set)
        if prediction == 0:
            self.result_label = tk.Label(
                root,
                text="The Image Contains a Natural Scene",
                wraplength=200)
            print('The Image Contains a Natural Scene')
        else:
            self.result_label = tk.Label(
                root,
                text="The Image Contains Man Made Objects in the Scene",
                wraplength=200)
            print('The Image Contains Man Made Objects in the Scene')
        self.result_label.config(font=("Times New Roman", 14))
        self.result_label.place(relx=0.7, rely=0.45, anchor='sw')

    def model_train(self):
        self.label_training_state.destroy()
        filename = 'features_train.csv'
        data, features, labels = data_file_reader.file_reader(
            filename, 'train')
        self.svc = SVC(kernel='linear', C=1)
        self.rf = RandomForestClassifier(n_estimators=50, random_state=1)
        self.knn = KNeighborsClassifier(n_neighbors=3)
        self.mv = VotingClassifier(estimators=[('rf', self.rf),
                                               ('knn', self.knn),
                                               ('svc', self.svc)],
                                   voting='hard')
        self.lda = LDA(n_components=200)
        lda_train_set = self.lda.fit_transform(features, np.ravel(labels))
        if self.comboExample.get() == "Majority Voting":
            self.clf = self.mv.fit(lda_train_set, np.ravel(labels))
            classifier_label = "Model Trained Successfully on Majority Voting"
        elif self.comboExample.get() == "SVC":
            self.clf = self.svc.fit(lda_train_set, np.ravel(labels))
            classifier_label = "Model Trained Successfully on SVC"
        elif self.comboExample.get() == "KNN":
            self.clf = self.knn.fit(lda_train_set, np.ravel(labels))
            classifier_label = "Model Trained Successfully on KNN"
        else:
            self.clf = self.rf.fit(lda_train_set, np.ravel(labels))
            classifier_label = "Model Trained Successfully on Random Forest"
        self.label_training_state = tk.Label(root,
                                             text=classifier_label,
                                             wraplength=200)
        self.label_training_state.config(font=("Times New Roman", 12))
        self.label_training_state.grid(row=6, column=8)
        self.train_flag = True

    def __init__(self, root):
        tk.Frame.__init__(self, root)
        self.train_flag = False
        self.feature_extraction_flag = False
        # BROWSE
        self.btn_browse = tk.Button(root, text="Browse", command=self.browse)
        self.btn_browse.grid(row=0, column=0)

        # APPLY
        self.btn_feature_extract = tk.Button(root,
                                             text="Extract Features",
                                             command=self.Apply)
        self.btn_feature_extract.grid(row=0, column=1)

        # Classify Image
        self.btn_classify = tk.Button(root,
                                      text="Classify Image",
                                      command=self.classify)
        self.btn_classify.grid(row=5, column=5)

        # Train Model
        self.model_train_btn = tk.Button(root,
                                         text="Train Model",
                                         command=self.model_train)
        self.model_train_btn.grid(row=5, column=8)

        # CANVAS
        self.canvas = tk.Canvas(root, width=800, height=800)
        self.canvas.grid(row=10, columns=10)

        self.label_imgtype = tk.Label(root, text="No Image Loaded")
        self.label_imgtype.config(font=("Times New Roman", 14))
        self.label_imgtype.grid(row=5, columnspan=2)

        # Labels
        self.label_training_state = tk.Label(root, text="Not Trained")
        self.label_training_state.grid(row=6, column=8)
        self.result_label = tk.Label(root, text="No Classification")
        self.result_label.config(font=("Times New Roman", 14))
        self.result_label.place(relx=0.7, rely=0.4, anchor='sw')
        self.course_label = tk.Label(root,
                                     text="Pattern Recognition (LOTI.05.046)")
        self.course_label.config(font=("Times New Roman", 16))
        self.course_label.place(relx=0.34, rely=0.92, anchor='sw')
        self.title_label = tk.Label(
            root, text="Man made object detection in natural scenes")
        self.title_label.config(font=("Times New Roman", 16))
        self.title_label.place(relx=0.3, rely=0.95, anchor='sw')
        self.classifier_label = tk.Label(root, text="Choose Classifier")
        self.classifier_label.grid(row=0, column=8)
        self.comboExample = ttk.Combobox(
            root, values=["SVC", "Random Forest", "KNN", "Majority Voting"])
        self.comboExample.grid(column=8, row=1)
        self.comboExample.current(3)

        self.logo = cv2.imread('tartu_logo.jpg', 1)
        self.b, self.g, self.r = cv2.split(self.logo)
        self.logo1 = cv2.merge((self.r, self.g, self.b))
        self.logo2 = Image.fromarray(self.logo1)
        self.logo = self.logo2.resize((180, 180))
        self.canvas.image_logo = ImageTk.PhotoImage(self.logo)
        self.canvas.create_image(0,
                                 540,
                                 image=self.canvas.image_logo,
                                 anchor='nw')
sc_x = StandardScaler()
sc_y = StandardScaler()

# Scale X
X_train = sc_x.fit_transform(X_train)
X_test = sc_x.transform(X_test)

# Scale y



###################### 3- Training ######################
# PCA
n_components = 2
lda = LDA(n_components = n_components)
_X_train = lda.fit_transform(X_train, y_train)
_X_test = lda.fit_transform(X_test, y_test)


# Logistic Regression
classifier = LogisticRegression()
classifier.fit(_X_train, y_train)


###################### 3- Testing ######################
y_pred = classifier.predict(_X_test)
cm = confusion_matrix(y_test, y_pred)


###################### 3- Visualization ######################
Beispiel #26
0
class LDA(CtrlNode):
    """Linear Discriminant Analysis, uses sklearn"""
    nodeName = "LDA"
    uiTemplate = [('train_data', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection,
                                                 'toolTip': 'Column containing the training data'}),
                  ('train_labels', 'combo', {'toolTip': 'Column containing training labels'}),
                  ('solver', 'combo', {'items': ['svd', 'lsqr', 'eigen']}),
                  ('shrinkage', 'combo', {'items': ['None', 'auto', 'value']}),
                  ('shrinkage_val', 'doubleSpin', {'min': 0.0, 'max': 1.0, 'step': 0.1, 'value': 0.5}),
                  ('n_components', 'intSpin', {'min': 2, 'max': 1000, 'step': 1, 'value': 2}),
                  ('tol', 'intSpin', {'min': -50, 'max': 0, 'step': 1, 'value': -4}),
                  ('score', 'lineEdit', {}),
                  ('predict_on', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection,
                                                 'toolTip': 'Data column of the input "predict" Transmission\n'
                                                            'that is used for predicting from the model'}),
                  ('Apply', 'check', {'applyBox': True, 'checked': False})
                  ]

    def __init__(self, name, **kwargs):
        CtrlNode.__init__(self, name, terminals={'train': {'io': 'in'},
                                                 'predict': {'io': 'in'},

                                                 'T': {'io': 'out'},
                                                 'coef': {'io': 'out'},
                                                 'means': {'io': 'out'},
                                                 'predicted': {'io': 'out'}
                                                 },
                          **kwargs)
        self.ctrls['score'].setReadOnly(True)

    def process(self, **kwargs):
        return self.processData(**kwargs)

    def processData(self, train: Transmission, predict: Transmission):
        self.t = train.copy()  #: Transmisison instance containing the training data with the labels
        if predict is not None:
            self.to_predict = predict.copy()  #: Transmission instance containing the data to predict after fitting on the the training data

        dcols, ccols, ucols = organize_dataframe_columns(self.t.df.columns)

        self.ctrls['train_data'].setItems(dcols)
        self.ctrls['train_labels'].setItems(ccols)

        if predict is not None:
            pdcols, ccols, ucols = organize_dataframe_columns(self.to_predict.df.columns)
            self.ctrls['predict_on'].setItems(pdcols)

        if not self.apply_checked():
            return

        train_columns = self.ctrls['train_data'].getSelectedItems()
        labels = self.ctrls['train_labels'].currentText()

        solver = self.ctrls['solver'].currentText()

        shrinkage = self.ctrls['shrinkage'].currentText()
        if shrinkage == 'value':
            shrinkage = self.ctrls['shrinkage_val'].value()
        elif shrinkage == 'None':
            shrinkage = None

        n_components = self.ctrls['n_components'].value()
        tol = 10 ** self.ctrls['tol'].value()

        store_covariance = True if solver == 'svd' else False

        params = {'train_data': train_columns,
                  'train_labels': labels,
                  'solver': solver,
                  'shrinkage': shrinkage,
                  'n_components': n_components,
                  'tol': tol,
                  'store_covariance': store_covariance
                  }

        kwargs = params.copy()
        kwargs.pop('train_data')
        kwargs.pop('train_labels')
        self.lda = LinearDiscriminantAnalysis(**kwargs)

        # Make an array of all the data from the selected columns
        self.X = np.hstack([np.vstack(self.t.df[train_column]) for train_column in train_columns])
        self.y = self.t.df[labels]

        self.X_ = self.lda.fit_transform(self.X, self.y)

        self.t.df['_LDA_TRANSFORM'] = self.X_.tolist()
        self.t.df['_LDA_TRANSFORM'] = self.t.df['_LDA_TRANSFORM'].apply(np.array)

        params.update({'score': self.lda.score(self.X, self.y),
                       'classes': self.lda.classes_.tolist()
                       })

        self.ctrls['score'].setText(f"{params['score']:.4f}")

        self.t.history_trace.add_operation('all', 'lda', params)

        self.t.df['_LDA_DFUNC'] = self.lda.decision_function(self.X).tolist()

        coef_df = pd.DataFrame({'classes': self.lda.classes_, '_COEF': self.lda.coef_.tolist()})
        t_coef = Transmission(df=coef_df, history_trace=self.t.history_trace)

        means_df = pd.DataFrame({'classes': self.lda.classes_, '_MEANS': self.lda.means_.tolist()})
        t_means = Transmission(df=means_df, history_trace=self.t.history_trace)

        out = {'T': self.t, 'coef': t_coef, 'means': t_means, 'predicted': None}

        # Predict using the trained model
        predict_columns = self.ctrls['predict_on'].getSelectedItems()

        if not predict_columns:
            return out

        if predict_columns != train_columns:
            QtWidgets.QMessageBox.warning('Predict and Train columns do not match',
                                          'The selected train and predict columns are different')

        predict_data = np.hstack([np.vstack(self.to_predict.df[predict_column]) for predict_column in predict_columns])
        self.to_predict.df['LDA_PREDICTED_LABELS'] = self.lda.predict(predict_data)
        self.to_predict.df['_LDA_TRANSFORM'] = self.lda.transform(predict_data).tolist()
        self.to_predict.df['_LDA_TRANSFORM'] = self.to_predict.df['_LDA_TRANSFORM'].apply(np.array)

        params_predict = params.copy()
        params_predict.update({'predict_columns': predict_columns})

        self.to_predict.history_trace.add_operation('all', 'lda-predict', params_predict)

        out.update({'predicted': self.to_predict})

        return out
Beispiel #27
0
def lda(X, y, *, solver='svd', shrinkage=None, n_components=None):
    """Linear discriminant analysis.

    This function reduces the dimensionality of the input by projecting it to 
    the most discriminative directions.
    
    Parameters
    ----------

    X : ndarray of shape (n_samples, n_features_pre) 
        Feature matrix. 
    
    y : ndarray of shape (n_samples,)
        Response variables.
    
    solver : string, default='svd'
        'svd' : Singular value decomposition
        'lsqr' : Least squares solution
        'eigen' : Eigenvalue decomposition
        
    shrinkage : string, float, or None, default=None
        Shrinkage parameter.
        None : no shrinkage
        'auto' : automatic shrinkage using the Ledoit-Wolf lemma
        float between 0 and 1: fixed shrinkage parameter
        
    n_components : int or None, default=None
        Number of components for dimensionality reduction. This parameter 
        cannot be larger than min(n_features, n_classes - 1).

    Returns
    -------

    arr :  ndarray of shape (n_samples, n_features_post)
        Array containing the LDA-transformed features.

    Examples
    --------

    >>> import numpy as np
    >>> from protlearn.features import aac, aaindex1, ngram
    >>> from protlearn.dimreduction import lda
    >>> seqs = ['ARKLY', 'EERKPGL', 'PGPGEERNLY']
    >>> labels = [1., 0., 0.]
    >>> comp, _ = aac(seqs)
    >>> aaind, _ = aaindex1(seqs)
    >>> ng, _ = ngram(seqs)
    >>> features = np.concatenate([comp, aaind, ng], axis=1)
    >>> features.shape
    (3, 575)
    >>> reduced = lda(features, labels, n_components=1)
    >>> reduced.shape
    (3, 1)
    
    """
    
    mdl = LinearDiscriminantAnalysis(solver=solver, 
                                     shrinkage=shrinkage,
                                     n_components=n_components)
    arr = mdl.fit_transform(X, y)
    
    return arr
Beispiel #28
0
                                                    y,
                                                    test_size=0.30,
                                                    random_state=20,
                                                    stratify=y)

#Standardize Features
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=6)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.fit_transform(X_test_std, y_test)

#Plot 2-D LDA
markers = ('s', 'x', 'o', '^', 'v', 'P', '*')
#plt.scatter(X_train_lda[:, 0], X_train_lda[:, 1], c=y_train, cmap=plt.cm.Paired, marker=markers[0])
#plt.xlabel('LDA 1')
#plt.ylabel('LDA 2')

#Declare empty arrays for accuracy and number of neigbours
accuracy = np.zeros((6, 1))
accuracy_val = np.zeros((6, 1))
neighbours = np.linspace(2, 7, 6)

for x in neighbours:
Beispiel #29
0
from sklearn.decomposition import PCA

pca = PCA(17)
fit = pca.fit(dataX, dataY)
train2 = pca.transform(dataX)
acuu(train2, dataY)

import warnings

warnings.filterwarnings("ignore")

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=17)
fit = lda.fit_transform(dataX, dataY)
train3 = lda.transform(dataX)
# fit = lda.fit(X=dataX, y=dataY)
# train2 = fit.fit_transform(dataX)
print(train3.shape)
acuu(train3, dataY)



#--------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

test = SelectKBest(score_func=chi2, k=11)        # k is number of features
Beispiel #30
0
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import Normalizer
'''
特征降维
	PCA
'''
import pandas as pd

train = pd.read_csv(r"G:\比赛分享\data\alltrain.csv")
test = pd.read_csv(r"G:\比赛分享\data\alltest.csv")

y = train['label']
del train['label']
del train['id']
id_a = test['id']
del test['id']

nor = Normalizer()
train = nor.fit_transform(train)
test = nor.transform(test)

lda = LDA(n_components=1)  #分类标签数-1 就只能产生一维的特征
train_lda = lda.fit_transform(train, y)
test_lda = lda.transform(test)

pca = PCA(50)
train_pca = pca.fit_transform(train)
test_pca = pca.transform(test)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

# Training the Logistic Regression model on the Training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
Beispiel #32
0
def lda_project(spike_times,
                spike_clusters,
                event_times,
                event_groups,
                pre_time=0,
                post_time=0.5,
                cross_validation='kfold',
                num_splits=5,
                prob_left=None,
                custom_validation=None):
    """
    Use linear discriminant analysis to project population vectors to the line that best separates
    the two groups. When cross-validation is used, the LDA projection is fitted on the training
    data after which the test data is projected to this projection.

    spike_times : 1D array
        spike times (in seconds)
    spike_clusters : 1D array
        cluster ids corresponding to each event in `spikes`
    event_times : 1D array
        times (in seconds) of the events from the two groups
    event_groups : 1D array
        group identities of the events, can be any number of groups, accepts integers and strings
    cross_validation : string
        which cross-validation method to use, options are:
            'none'              No cross-validation
            'kfold'             K-fold cross-validation
            'leave-one-out'     Leave out the trial that is being decoded
            'block'             Leave out the block the to-be-decoded trial is in
            'custom'            Any custom cross-validation provided by the user
    num_splits : integer
        ** only for 'kfold' cross-validation **
        Number of splits to use for k-fold cross validation, a value of 5 means that the decoder
        will be trained on 4/5th of the data and used to predict the remaining 1/5th. This process
        is repeated five times so that all data has been used as both training and test set.
    prob_left : 1D array
        ** only for 'block' cross-validation **
        the probability of the stimulus appearing on the left for each trial in event_times
    custom_validation : generator
        ** only for 'custom' cross-validation **
        a generator object with the splits to be used for cross validation using this format:
            (
                (split1_train_idxs, split1_test_idxs),
                (split2_train_idxs, split2_test_idxs),
                (split3_train_idxs, split3_test_idxs),
             ...)
    n_neurons : int
        Group size of number of neurons to be sub-selected

    Returns
    -------
    lda_projection : 1D array
        the position along the LDA projection axis for the population vector of each trial

    """

    # Check input
    assert cross_validation in [
        'none', 'kfold', 'leave-one-out', 'block', 'custom'
    ]
    assert event_times.shape[0] == event_groups.shape[0]
    if cross_validation == 'block':
        assert event_times.shape[0] == prob_left.shape[0]
    if cross_validation == 'custom':
        assert isinstance(custom_validation, types.GeneratorType)

    # Get matrix of all neuronal responses
    times = np.column_stack(
        ((event_times - pre_time), (event_times + post_time)))
    pop_vector, cluster_ids = get_spike_counts_in_bins(spike_times,
                                                       spike_clusters, times)
    pop_vector = pop_vector.T

    # Initialize
    lda = LinearDiscriminantAnalysis()
    lda_projection = np.zeros(event_groups.shape)

    if cross_validation == 'none':
        # Find the best LDA projection on all data and transform those data
        lda_projection = lda.fit_transform(pop_vector, event_groups)

    else:
        # Perform cross-validation
        if cross_validation == 'leave-one-out':
            cv = LeaveOneOut().split(pop_vector)
        elif cross_validation == 'kfold':
            cv = KFold(n_splits=num_splits).split(pop_vector)
        elif cross_validation == 'block':
            block_lengths = [sum(1 for i in g) for k, g in groupby(prob_left)]
            blocks = np.repeat(np.arange(len(block_lengths)), block_lengths)
            cv = LeaveOneGroupOut().split(pop_vector, groups=blocks)
        elif cross_validation == 'custom':
            cv = custom_validation

        # Loop over the splits into train and test
        for train_index, test_index in cv:

            # Find LDA projection on the training data
            lda.fit(pop_vector[train_index],
                    [event_groups[j] for j in train_index])

            # Project the held-out test data to projection
            lda_projection[test_index] = lda.transform(
                pop_vector[test_index]).T[0]

    return lda_projection
# Perform Leave One Out validation for the LDA - Decision Tree Classifier

total_score=0
for train_index,test_index in LOO.split(feature):
    train_features, test_features = feature[train_index], feature[test_index]
    train_labels, test_labels = labels[train_index], labels[test_index]

    lda  = LDA()
    lda=lda.fit(train_features,train_labels.ravel())
    lda_train_set = lda.transform(train_features)
    lda_test_set = lda.transform(test_features)
    
    clf_lda=d3.fit(lda_train_set,train_labels)
    prediction_lda=clf_lda.predict(lda_test_set)
    total_score+=accuracy_score(test_labels,prediction_lda) 
mean_score=(total_score/number_of_iterations)
score = mean_score
print("LDA Scores + leave one cross_validation:",score)


# Perform Cross Validation for 10 folds for the LDA-Decision Tree Classifier

lda = LDA()
lda_features=lda.fit_transform(feature,labels.ravel()) 

scores = cross_val_score(d3, lda_features, labels, cv=10)
scores = scores.mean()
print("LDA Scores + 10 fold cross_validation:",scores)


Beispiel #34
0
# ---------
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
X = scaler.fit_transform(X)
# ---------
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=1 / 3,
                                                    random_state=16,
                                                    shuffle=True)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
print("=" * 25)
# ---------
# Applying LDAModel Model
LDAModel = LDA(n_components=2, solver='svd')
X = LDAModel.fit_transform(X, y)
# X_train = LDAModel.fit_transform(X_train,y_train)
# X_test = LDAModel.transform(X_test)
print(X.shape)
print("=" * 10)
# ---------
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
X = scaler.fit_transform(X)
# ---------
# ----------------------------------------------------
# Calculating Details
print('LDAModel Train Score is : ', LDAModel.score(X_train, y_train))
print('LDAModel Test Score is : ', LDAModel.score(X_test, y_test))
print("=" * 10)
# LDAModel Train Score is :  0.98
# LDAModel Test Score is :  0.98
def run(train_pyramid_descriptors, D, test_pyramid_descriptors,
        feat_des_options):

    train_images_filenames = cPickle.load(
        open('train_images_filenames.dat', 'rb'))
    test_images_filenames = cPickle.load(
        open('test_images_filenames.dat', 'rb'))
    train_labels = cPickle.load(open('train_labels.dat', 'rb'))
    test_labels = cPickle.load(open('test_labels.dat', 'rb'))

    k = feat_des_options['k']
    codebook = MiniBatchKMeans(n_clusters=k,
                               verbose=False,
                               batch_size=k * 20,
                               compute_labels=False,
                               reassignment_ratio=10**-4,
                               random_state=42)
    codebook.fit(D)

    visual_words_pyramid = np.zeros((len(train_pyramid_descriptors),
                                     k * len(train_pyramid_descriptors[0])),
                                    dtype=np.float32)
    for i in range(len(train_pyramid_descriptors)):
        visual_words_pyramid[i, :] = spatial_pyramid_histograms(
            train_pyramid_descriptors[i], codebook, k)

    knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean')
    knn.fit(visual_words_pyramid, train_labels)

    # logreg = LogisticRegression(random_state=0,max_iter=300).fit(visual_words_pyramid, train_labels)
    # scores = cross_validate(logreg, visual_words_pyramid, train_labels,scoring = ['precision_macro', 'recall_macro','f1_macro'], cv=5,return_estimator=True)

    scores = cross_validate(
        knn,
        visual_words_pyramid,
        train_labels,
        scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'],
        cv=8,
        return_estimator=True)
    cross_val_accuracy = scores['test_accuracy'].mean()
    cross_val_precision = scores['test_precision_macro'].mean()
    cross_val_recall = scores['test_recall_macro'].mean()
    cross_val_f1 = scores['test_f1_macro'].mean()
    # print("%0.2f precision with a std dev of %0.2f" % (cross_val_precision, scores['test_precision_macro'].std()))
    # print("%0.2f recall with a std dev of %0.2f" % (cross_val_recall, scores['test_recall_macro'].std()))
    # print("%0.2f F1-score with a std dev of %0.2f" % (cross_val_f1, scores['test_f1_macro'].std()))

    visual_words_test = np.zeros(
        (len(test_images_filenames), visual_words_pyramid.shape[1]),
        dtype=np.float32)
    for i in range(len(test_images_filenames)):
        visual_words_test[i, :] = spatial_pyramid_histograms(
            test_pyramid_descriptors[i], codebook, k)

    test_accuracy = 100 * knn.score(visual_words_test, test_labels)
    # print("Test accuracy: %0.2f" % (test_accuracy))

    test_prediction = knn.predict(visual_words_test)
    # test_prediction = logreg.predict(visual_words_test)
    test_precision, test_recall, test_fscore, _ = precision_recall_fscore_support(
        test_labels, test_prediction, average='macro')
    # print("%0.2f precision" % (test_precision))
    # print("%0.2f recall" % (test_recall))
    # print("%0.2f F1-score" % (test_fscore))

    # pca = PCA(n_components=64)
    pca = PCA(n_components=feat_des_options['pca_perc'], svd_solver='full')
    VWpca = pca.fit_transform(visual_words_pyramid)
    knnpca = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean')
    knnpca.fit(VWpca, train_labels)
    vwtestpca = pca.transform(visual_words_test)
    pca_test_accuracy = 100 * knnpca.score(vwtestpca, test_labels)
    # print("PCA Test accuracy: %0.2f" % (pca_test_accuracy))
    scores_pca = cross_validate(
        knnpca,
        visual_words_pyramid,
        train_labels,
        scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'],
        cv=8,
        return_estimator=True)
    cross_val_accuracy_pca = scores_pca['test_accuracy'].mean()
    cross_val_precision_pca = scores_pca['test_precision_macro'].mean()
    cross_val_recall_pca = scores_pca['test_recall_macro'].mean()
    cross_val_f1_pca = scores_pca['test_f1_macro'].mean()

    lda = LinearDiscriminantAnalysis(n_components=7)
    VWlda = lda.fit_transform(visual_words_pyramid, train_labels)
    knnlda = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean')
    knnlda.fit(VWlda, train_labels)
    vwtestlda = lda.transform(visual_words_test)
    lda_test_accuracy = 100 * knnlda.score(vwtestlda, test_labels)
    # print("LDA Test accuracy: %0.2f" % (lda_test_accuracy))

    return [
        cross_val_accuracy, cross_val_precision, cross_val_recall,
        cross_val_f1, test_precision, test_recall, test_fscore, test_accuracy,
        pca_test_accuracy, cross_val_accuracy_pca, cross_val_precision_pca,
        cross_val_recall_pca, cross_val_f1_pca, lda_test_accuracy
    ]
Beispiel #36
0
dataset = pd.read_csv('../../data/Wine.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# Dimensionality Reduction with LDA, n_components -> number of dimensions we want to get to
lda = LinearDiscriminantAnalysis(n_components=2)
X_train = lda.fit_transform(
    X_train, y_train)  # y_train is necessary since it is supervised algorithm
X_test = lda.transform(X_test)

# Model
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:", cm)

print("Accuracy Score:", accuracy_score(y_test, y_pred))
Beispiel #37
0
         color='blue',
         linewidth=4)
plt.xlabel('Number of components')
plt.ylabel('Cumulative explained variance')
plt.show()

#Applying Kernel PCA #Please Turn Off when applying PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=32, kernel='rbf')
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

#Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=35)
X_train = lda.fit_transform(X_train, Y_train)
X_test = lda.fit_transform(X_test, Y_test)

#Fitting SVM to the Training Set
from sklearn.svm import SVC
classifier = SVC(
    kernel='rbf',
    random_state=0)  #kernel can be changed to linear for linear SVM
classifier.fit(X_train, Y_train)

#Fitting Decision Tree to the Training Set
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier.fit(X_train, Y_train)

#Predicting the Test Set Results
#[email protected]
"""
使得空间中两类的距离尽可能的远。
"""
print(__doc__)

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

mean = [0, 0]  # 平均值
cov = [[1, 0.9], [0.9, 1]]  # 协方差
x, y = np.random.multivariate_normal(mean, cov, 1000).T
x = np.reshape(x, [-1, 1])
y = np.reshape(y, [-1, 1])
X = np.concatenate([x, y], axis=1)
label = np.zeros_like(x[:, 0])
label[x[:, 0] > 0] = 1
pca = LinearDiscriminantAnalysis()
X_pca = pca.fit_transform(X, label)

fig = plt.figure()
ax = fig.add_subplot(211)
ax.scatter(X[:, 0], X[:, 1], c=label)
ax.axis("equal")
ax = fig.add_subplot(212)
ax.scatter(X_pca, np.zeros_like(X_pca), c=label)
ax.axis("equal")
plt.show()
Beispiel #39
0
#y_test = y_test[np.newaxis]

print("Shape of Train set features (X_train) :  ", X_train.shape)
print("Shape of Train set labels (y_train) :  ", y_train.shape)
print("Shape of Test set features (X_test) :  ", X_test.shape)
print("Shape of Test set labels (y_test) :  ", y_test.shape)

# ### 5. APPLYING LDA TO TEST AND TRAIN

# In[5]:

###  LDA

lda = LinearDiscriminantAnalysis()

X_train_lda = lda.fit_transform(X_train, y_train)

X_test_lda = lda.transform(X_test)

print("Shape of Feature Test set Before LDA: ", X_train.shape)
print("Shape of Feature Test set After LDA: ", X_train_lda.shape)

print("Shape of Feature Test set Before LDA: ", X_test.shape)
print("Shape of Feature Test set After LDA: ", X_test_lda.shape)

# ### 5.1 Applying LDA to Linear SVM

# In[6]:

###SVM - Linear
Beispiel #40
0
def show_scatter(ax, title, A, A_axis, mask, features, args):
    '''
    show the scatter plot (mainly A matrix)
    '''
    if args.factor is not None:
        cmap = matplotlib.cm.get_cmap('rainbow')

        # display index of each point in gray style
        # for i, (x, y) in enumerate( A ):
        # ax.text( x, y, str(i+1),
        # color='gray',
        # fontsize=4,
        # alpha=0.4,
        # horizontalalignment='center',
        # verticalalignment='center' )

        # scatter with markers/colors
        C, Cnames, M, Mnames, y = parse_color_marker(args.factor, args.color,
                                                     args.marker)
        if A.shape[1] > 2:
            from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
            lda = LinearDiscriminantAnalysis(n_components=2)
            A = lda.fit_transform(A, y)
            print(lda.explained_variance_ratio_,
                  lda.explained_variance_ratio_.sum())

        for marker in set(M):
            idx = np.array([_m == marker for _m in M])
            ax.scatter(A[idx, 0],
                       A[idx, 1],
                       c=C[idx],
                       marker=marker,
                       cmap=cmap,
                       alpha=.5,
                       s=7)

        # generate the legend
        handles = []
        for c, name in Cnames:
            handles.append(mpatches.Patch(color=cmap(c), label=name))
        for m, name in Mnames:
            handles.append(
                mlines.Line2D([], [], c='k', lw=0.5, marker=m, label=name))
        #ax.legend( handles=handles, loc='best', prop={'size': 6} )

    else:
        cmap = matplotlib.cm.get_cmap('Spectral')
        for i, (x, y) in enumerate(A):
            if not mask[i]:
                ax.text(x,
                        y,
                        str(i + 1),
                        color=cmap(i / A.shape[0]),
                        fontsize=4,
                        fontweight='black',
                        alpha=0.8,
                        horizontalalignment='center',
                        verticalalignment='center')

        ax.scatter(A[mask, 0], A[mask, 1], color='black', s=8, alpha=0.8)

    # show the axis
    if A_axis is not None:
        cmap = matplotlib.cm.get_cmap('ocean')
        score_a = []
        for i, a in enumerate(axis):
            _xy = axis_curves[i * 20:(i + 1) * 20] - A.mean(0)
            score_a.append((a, np.linalg.norm(_xy)))
        score_a.sort(key=lambda _: _[1], reverse=True)
        a_list = [score_a[i][0] for i in range(7)]

        c = 0
        for i, a in enumerate(axis):
            if not (a in a_list): continue

            color = c / (len(a_list) + 1)
            c += 1

            _x = axis_curves[i * 20:(i + 1) * 20, 0]
            _y = axis_curves[i * 20:(i + 1) * 20, 1]

            ax.plot(_x, _y, c=cmap(color))
            ax.arrow(_x[-2],
                     _y[-2],
                     _x[-1] - _x[-2],
                     _y[-1] - _y[-2],
                     color=cmap(color),
                     head_width=.3)
            ax.text(_x[-1],
                    _y[-1],
                    features[a],
                    fontsize=7,
                    color=cmap(color),
                    alpha=.7)
            print(features[a])

    # set the plotting axis
    xmin = A[:, 0].min()
    xmax = A[:, 0].max()
    margin = 0.05 * (xmax - xmin)
    xmin -= margin
    xmax += margin
    ax.set_xlim(xmin, xmax)
    ax.set_xticks([xmin, xmax])

    ymin = A[:, 1].min()
    ymax = A[:, 1].max()
    margin = 0.05 * (ymax - ymin)
    ymin -= margin
    ymax += margin
    ax.set_ylim(ymin, ymax)
    ax.set_yticks([ymin, ymax])

    ax.set_xlabel(r'\#comp1', labelpad=-10)
    ax.set_ylabel(r'\#comp2', labelpad=-25)
    ax.set_title(title)
Beispiel #41
0
def main():
    attrs, classes = prepare_ds('cov_data.csv')

    for cls, color in zip(range(1, 4), ('red', 'green', 'blue')):
        attr_one = attrs[:, 0][classes == cls]
        attr_two = attrs[:, 1][classes == cls]
        p = pearsonr(attr_one, attr_two)
        plt.scatter(x=attr_one,
                    y=attr_two,
                    marker='o',
                    color=color,
                    label='cls: {:}, pearsonr={:.2f}'.format(cls, p[0]))

    plt.title('Pearson correlation')
    plt.xlabel('Elevation, m')
    plt.ylabel('Slope, num')
    plt.legend(loc='upper right')
    plt.show()

    data_train, data_test, class_train, class_test = train_test_split(
        attrs,
        classes,
        test_size=.3,
        random_state=123,
    )

    lda = LDA(n_components=2)
    lda_transform = lda.fit_transform(data_train, class_train)

    plt.figure(figsize=(10, 8))
    for cls, color in zip(range(1, 4), ('red', 'green', 'blue')):
        attr_one = lda_transform[:, 0][class_train == cls]
        attr_two = lda_transform[:, 1][class_train == cls]
        plt.scatter(x=attr_one,
                    y=attr_two,
                    marker='o',
                    color=color,
                    label='cls: {:}'.format(cls, p[0]))

    plt.xlabel('vec 1')
    plt.ylabel('vec 2')
    plt.legend()
    plt.show()

    lda_clf = LDA()
    lda_clf.fit(data_train, class_train)

    pred_train_lda = lda_clf.predict(data_train)
    print('Точность классификации на обучающем наборе данных (LDA): {:.2%}'.
          format(metrics.accuracy_score(class_train, pred_train_lda)))

    pred_test_lda = lda_clf.predict(data_test)
    print('Точность классификации на тестовом наборе данных (LDA): {:.2%}'.
          format(metrics.accuracy_score(class_test, pred_test_lda)))

    qda_clf = QuadraticDiscriminantAnalysis()
    qda_clf.fit(data_train, class_train)

    pred_train_qda = qda_clf.predict(data_train)
    print('Точность классификации на обучающем наборе данных (QDA): {:.2%}'.
          format(metrics.accuracy_score(class_train, pred_train_qda)))

    pred_test_qda = qda_clf.predict(data_test)
    print('Точность классификации на тестовом наборе данных (QDA): {:.2%}'.
          format(metrics.accuracy_score(class_test, pred_test_qda)))
Beispiel #42
0
def discriminatePlot(X, y, cVal, titleStr='', figdir='.', Xcolname=None):
    # Frederic's Robust Wrapper for discriminant analysis function.  Performs lda, qda and RF afer error checking,
    # Generates nice plots and returns cross-validated
    # performance, stderr and base line.
    # X np array n rows x p parameters
    # y group labels n rows
    # rgb color code for each data point - should be the same for each data beloging to the same group
    # titleStr title for plots
    # figdir is a directory name (folder name) for eps figures
    # Xcolname is a np.array or list of strings with column names for printout display
    # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses

    # Global Parameters
    CVFOLDS = 10
    MINCOUNT = 10
    MINCOUNTTRAINING = 5
    # figdir = '/Users/frederictheunissen/Documents/Data/Julie/Acoustical Analysis/Figures Voice'

    # Initialize Variables and clean up data
    classes, classesCount = np.unique(
        y, return_counts=True
    )  # Classes to be discriminated should be same as ldaMod.classes_
    goodIndClasses = np.array([n >= MINCOUNT for n in classesCount])
    goodInd = np.array([b in classes[goodIndClasses] for b in y])
    yGood = y[goodInd]
    XGood = X[goodInd]
    cValGood = cVal[goodInd]

    classes, classesCount = np.unique(yGood, return_counts=True)
    nClasses = classes.size  # Number of classes or groups

    # Do we have enough data?
    if (nClasses < 2):
        print(
            'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis'
            % (MINCOUNT))
        return -1, -1, -1, -1, -1, -1, -1
    cvFolds = min(min(classesCount), CVFOLDS)
    if (cvFolds < CVFOLDS):
        print(
            'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)'
            % (cvFolds, CVFOLDS))

    # Data size and color values
    nD = XGood.shape[1]  # number of features in X
    nX = XGood.shape[0]  # number of data points in X
    cClasses = []  # Color code for each class
    for cl in classes:
        icl = (yGood == cl).nonzero()[0][0]
        cClasses.append(np.append(cValGood[icl], 1.0))
    cClasses = np.asarray(cClasses)

    # Use a uniform prior
    myPrior = np.ones(nClasses) * (1.0 / nClasses)

    # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted.
    nDmax = int(np.fix(np.sqrt(nX / 5)))
    if nDmax < nD:
        print('Warning: Insufficient data for', nD,
              'parameters. PCA projection to', nDmax, 'dimensions.')
    nDmax = min(nD, nDmax)
    pca = PCA(n_components=nDmax)
    Xr = pca.fit_transform(XGood)
    print('Variance explained is %.2f%%' %
          (sum(pca.explained_variance_ratio_) * 100.0))

    # Initialise Classifiers
    ldaMod = LDA(n_components=min(nDmax, nClasses - 1),
                 priors=myPrior,
                 shrinkage=None,
                 solver='svd')
    qdaMod = QDA(priors=myPrior)
    rfMod = RF()  # by default assumes equal weights

    # Perform CVFOLDS fold cross-validation to get performance of classifiers.
    ldaScores = np.zeros(cvFolds)
    qdaScores = np.zeros(cvFolds)
    rfScores = np.zeros(cvFolds)
    skf = cross_validation.StratifiedKFold(yGood, cvFolds)
    iskf = 0

    for train, test in skf:

        # Enforce the MINCOUNT in each class for Training
        trainClasses, trainCount = np.unique(yGood[train], return_counts=True)
        goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount])
        goodIndTrain = np.array(
            [b in trainClasses[goodIndClasses] for b in yGood[train]])

        # Specity the training data set, the number of groups and priors
        yTrain = yGood[train[goodIndTrain]]
        XrTrain = Xr[train[goodIndTrain]]

        trainClasses, trainCount = np.unique(yTrain, return_counts=True)
        ntrainClasses = trainClasses.size

        # Skip this cross-validation fold because of insufficient data
        if ntrainClasses < 2:
            continue
        goodInd = np.array([b in trainClasses for b in yGood[test]])
        if (goodInd.size == 0):
            continue

        # Fit the data
        trainPriors = np.ones(ntrainClasses) * (1.0 / ntrainClasses)
        ldaMod.priors = trainPriors
        qdaMod.priors = trainPriors
        ldaMod.fit(XrTrain, yTrain)
        qdaMod.fit(XrTrain, yTrain)
        rfMod.fit(XrTrain, yTrain)

        ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]])

        iskf += 1

    if (iskf != cvFolds):
        cvFolds = iskf
        ldaScores.reshape(cvFolds)
        qdaScores.reshape(cvFolds)
        rfScores.reshape(cvFolds)

# Refit with all the data  for the plots

    ldaMod.priors = myPrior
    qdaMod.priors = myPrior
    Xrr = ldaMod.fit_transform(Xr, yGood)
    # Check labels
    for a, b in zip(classes, ldaMod.classes_):
        if a != b:
            print('Error in ldaPlot: labels do not match')

    # Print the five largest coefficients of first 3 DFA
    MAXCOMP = 3  # Maximum number of DFA componnents
    MAXWEIGHT = 5  # Maximum number of weights printed for each componnent

    ncomp = min(MAXCOMP, nClasses)
    nweight = min(MAXWEIGHT, nD)
    weights = np.dot(ldaMod.coef_[0:ncomp, :], pca.components_)

    print('LDA Weights:')
    for ic in range(ncomp):
        idmax = np.argsort(np.abs(weights[ic, :]))[::-1]
        print('DFA %d: ' % ic, end='')
        for iw in range(nweight):
            if type(Xcolname) == None:
                colstr = 'C%d' % idmax[iw]
            else:
                colstr = Xcolname[idmax[iw]]
            print('%s %.3f; ' % (colstr, float(weights[ic, idmax[iw]])),
                  end='')
        print()

    # Obtain fits in this rotated space for display purposes
    ldaMod.fit(Xrr, yGood)
    qdaMod.fit(Xrr, yGood)
    rfMod.fit(Xrr, yGood)

    XrrMean = Xrr.mean(0)

    # Make a mesh for plotting
    x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
    xm1 = np.reshape(x1, -1)
    xm2 = np.reshape(x2, -1)
    nxm = np.size(xm1)
    Xm = np.zeros((nxm, Xrr.shape[1]))
    Xm[:, 0] = xm1
    if Xrr.shape[1] > 1:
        Xm[:, 1] = xm2

    for ix in range(2, Xrr.shape[1]):
        Xm[:, ix] = np.squeeze(np.ones((nxm, 1))) * XrrMean[ix]

    XmcLDA = np.zeros((nxm, 4))  # RGBA values for color for LDA
    XmcQDA = np.zeros((nxm, 4))  # RGBA values for color for QDA
    XmcRF = np.zeros((nxm, 4))  # RGBA values for color for RF

    # Predict values on mesh for plotting based on the first two DFs
    yPredLDA = ldaMod.predict_proba(Xm)
    yPredQDA = qdaMod.predict_proba(Xm)
    yPredRF = rfMod.predict_proba(Xm)

    # Transform the predictions in color codes
    maxLDA = yPredLDA.max()
    for ix in range(nxm):
        cWeight = yPredLDA[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcLDA[ix, :] = np.dot(cWinner, cClasses)
        XmcLDA[ix, 3] = cWeight.max() / maxLDA

    # Plot the surface of probability
    plt.figure(facecolor='white', figsize=(10, 3))
    plt.subplot(131)
    Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean() * 100.0)))
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')

    # Transform the predictions in color codes
    maxQDA = yPredQDA.max()
    for ix in range(nxm):
        cWeight = yPredQDA[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcQDA[ix, :] = np.dot(cWinner, cClasses)
        XmcQDA[ix, 3] = cWeight.max() / maxQDA

    # Plot the surface of probability
    plt.subplot(132)
    Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean() * 100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    plt.savefig('%s/%s.eps' % (figdir, titleStr))

    # Transform the predictions in color codes
    maxRF = yPredRF.max()
    for ix in range(nxm):
        cWeight = yPredRF[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses  # Weighted colors does not work
        XmcRF[ix, :] = np.dot(cWinner, cClasses)
        XmcRF[ix, 3] = cWeight.max() / maxRF

    # Plot the surface of probability
    plt.subplot(133)
    Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean() * 100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))

    plt.show()

    # Results
    ldaScore = ldaScores.mean() * 100.0
    qdaScore = qdaScores.mean() * 100.0
    rfScore = rfScores.mean() * 100.0
    ldaScoreSE = ldaScores.std() * 100.0
    qdaScoreSE = qdaScores.std() * 100.0
    rfScoreSE = rfScores.std() * 100.0

    print("Number of classes %d. Chance level %.2f %%" %
          (nClasses, 100.0 / nClasses))
    print("%s LDA: %.2f (+/- %0.2f) %%" % (titleStr, ldaScore, ldaScoreSE))
    print("%s QDA: %.2f (+/- %0.2f) %%" % (titleStr, qdaScore, qdaScoreSE))
    print("%s RF: %.2f (+/- %0.2f) %%" % (titleStr, rfScore, rfScoreSE))
    return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
Beispiel #43
0
features[:, 13] = labelencoder.fit_transform(features[:, 13])

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

from sklearn.model_selection import train_test_split
f_train, f_test, t_train, t_test = train_test_split(features,
                                                    target,
                                                    test_size=0.15,
                                                    random_state=0)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2)
# 6 principal components
f_train = lda.fit_transform(f_train)
f_test = lda.transform(f_test)

from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=40,
                                    criterion='entropy',
                                    random_state=0)
classifier.fit(f_train, t_train)
predictions = classifier.predict(f_test)

from sklearn.metrics import confusion_matrix, accuracy_score
precision = accuracy_score(t_test, predictions)
matrix = confusion_matrix(t_test, predictions)

# Base Line = Help to evaluate the models
# Calculus = most_commom/rest
Beispiel #44
0
                               color='Word')) + geom_line() +
 labs(title='Word Usage Change over Time in First Presidency and the 12'))

youth = (dfwords.groupby(dfwords['Date'].map(lambda x: x.year)).mean()[[
    'young men', 'young women'
]].unstack().reset_index())
youth.columns = ['Word', 'Date', 'Mean TF-IDF Score']
(ggplot(youth, aes(x='Date', y='Mean TF-IDF Score', color='Word')) +
 geom_line() +
 labs(title='Word Usage Change over Time in First Presidency and the 12'))

pca = PCA(n_components=3)
pca_df = pca.fit_transform(tfidf_X_train.todense())

lda = LinearDiscriminantAnalysis(n_components=3)
lda_df = lda.fit_transform(tfidf_X_train.todense(), y_train)

principalDf = pd.DataFrame(data=pca_df, columns=['pc1', 'pc2', 'pc3'])
principalDf['Speaker_num'] = y_train
recent_Oaks = list(
    np.where([
        X_train_all.Date[i] > datetime.datetime(2020, 1, 1)
        and X_train_all.Speaker[i] == 'Dallin H. Oaks'
        for i in X_train_all.index
    ])[0])
principalDf['Speaker'] = [to_speaker_dict[y_val] for y_val in y_train]
principalDf.loc[recent_Oaks, 'Speaker'] = '2020 Dallin H. Oaks'
principalDf.loc[recent_Oaks, 'Speaker_num'] = 15

linearDF = pd.DataFrame(data=lda_df, columns=['lda1', 'lda2', 'lda3'])
linearDF['Speaker_num'] = y_train
Beispiel #45
0
y = dataset.iloc[:, 13].values

xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                y,
                                                test_size=0.2,
                                                random_state=0)
# Scaling
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.fit_transform(xtest)

# Dimensionality Reduction
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

lda = lda(n_components=2)
xtrain = lda.fit_transform(xtrain, ytrain)
xtest = lda.transform(xtest)

from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()
classifier.fit(xtrain, ytrain)
classifiedvalue = classifier.predict(xtest)

from sklearn.metrics import confusion_matrix

confusionmatrix = confusion_matrix(ytest, classifiedvalue)
from matplotlib.colors import ListedColormap

# test Set Visualisation
xset, yset = xtest, ytest
y = dat['Class']  # Split off classifications
X = dat.ix[:, '0':]  # Split off features
X_norm = (X - X.min()) / (X.max() - X.min())

# print(cols)
# print(X_norm)
# print(y)

# PCA plotting
# plot_method = sklearnPCA(n_components=2) #2-dimensional PCA

# LDA plotting
plot_method = LDA(n_components=2)  #2-dimensional LDA

transformed = pd.DataFrame(plot_method.fit_transform(X_norm, y))

plt.scatter(transformed[y == 0][0],
            transformed[y == 0][1],
            label='0=neutral',
            c='black')
plt.scatter(transformed[y == 1][0],
            transformed[y == 1][1],
            label='1=anger',
            c='red')
plt.scatter(transformed[y == 3][0],
            transformed[y == 3][1],
            label='3=disgust',
            c='orange')
plt.scatter(transformed[y == 4][0],
            transformed[y == 4][1],
Beispiel #47
0
##PCA
print("Computing PCA projection")
t0 = time()
X_pca = decomposition.TruncatedSVD(n_components=3).fit_transform(X_test)
plot_embedding_2d(X_pca[:, 0:2], y_test, "PCA 2D")
plot_embedding_3d(X_pca, y_test, "PCA 3D (time %.2fs)" % (time() - t0))

#%%
#LDA
print("Computing LDA projection")
X2 = X_test.copy()
X2.flat[::X_test.shape[1] + 1] += 0.01  # Make X invertible
t0 = time()
lda = LinearDiscriminantAnalysis(n_components=3)
X_lda = lda.fit_transform(X2, y_test)
plot_embedding_2d(X_lda[:, 0:2], y_test, "LDA 2D")
plot_embedding_3d(X_lda, y_test, "LDA 3D (time %.2fs)" % (time() - t0))

# MDS
print("Computing MDS embedding")
clf = manifold.MDS(n_components=3, n_init=1, max_iter=100)
t0 = time()
X_mds = clf.fit_transform(X_test)
print("Done. Stress: %f" % clf.stress_)
plot_embedding_2d(X_mds, y_test, "MDS (time %.2fs)" % (time() - t0))
plot_embedding_3d(X_mds, y_test, "MDS (time %.2fs)" % (time() - t0))

lable = trainlable
c = (lable == 4)
c2 = (lable == 9)
Beispiel #48
0
print('Between-class scatter matix: %sx%s' %(S_B.shape[0], S_B.shape[1]))
    

eigen_vals, eigen_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
eigen_pairs = [
    (np.abs(eigen_vals[i], eigetn_vecs[:, i]) for i in range(len(eigen_vals)))]
eigen_pairs = sorted(eigen_pairs, key = lambda k: k[0], reverse = True)

print('Eigenvalues in decreasing order:\n')
for ev in eigen_pairs:
    print ev[0]
'''

# LDA in sklearn
lda = LDA(n_components = 2)
X_train_lda = lda.fit_transform(X_train_std, y_train)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)

plot_decision_regions(X_train_lda, y_train, classifier = lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc = 'lower left')
plt.show()

# On test set:
X_test_lda = lda.transform(X_test_std)

plot_decision_regions(X_test_lda, y_test, classifier = lr)
plt.xlabel('LD 1')
 def lda(self):
     lda = LDA(n_components=1)
     self.xlda_train = lda.fit_transform(self.x_train,  self.y_train)
     self.xlda_test = lda.transform(self.x_test)
     return self.xlda_test, self.xlda_train