Ejemplo n.º 1
0
def main():
    # Importing the dataset
    dataset = pd.read_csv('Wine.csv')
    X = dataset.iloc[:, 0:13].values
    y = dataset.iloc[:, 13].values

    # Splitting the dataset into the Training set and Test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Applying LDA
    lda = LDA(n_components=2)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)

    # Fitting Logistic Regression to the Training set
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    # Making the Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    # Visualising the Training set results
    X_set, y_set = X_train, y_train
    X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
                         np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))
    plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
                 alpha=0.75, cmap=ListedColormap(('red', 'green', 'blue')))
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                    c=ListedColormap(('red', 'green', 'blue'))(i), label=j)
    plt.title('Logistic Regression (Training set)')
    plt.xlabel('LD1')
    plt.ylabel('LD2')
    plt.legend()
    plt.show()

    # Visualising the Test set results
    X_set, y_set = X_test, y_test
    X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
                         np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))
    plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
                 alpha=0.75, cmap=ListedColormap(('red', 'green', 'blue')))
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                    c=ListedColormap(('red', 'green', 'blue'))(i), label=j)
    plt.title('Logistic Regression (Test set)')
    plt.xlabel('LD1')
    plt.ylabel('LD2')
    plt.legend()
    plt.show()
Ejemplo n.º 2
0
    ind[0::2] = np.arange(c - 1, c // 2 - 1, -1)
    ind[1::2] = np.arange(0, c // 2)
    W = W[:, ind]
    csp_filters.append(W.T[:ncomp])

XT_CSP, XV_CSP = [], []
for i in range(nbands):
    YT = np.asarray([np.dot(csp_filters[i], ep) for ep in XT[i]])
    XT_CSP.append(np.log(np.mean(YT**2, axis=2)))  # Feature extraction
    # XT_CSP.append( np.log( np.var( YT, axis=2 ) ) )

#%% LDA
SCORE_T = np.zeros((len(ZT), nbands))
lda_list = []
for i in range(nbands):
    lda = LDA()
    lda.fit(XT_CSP[i], tT)
    SCORE_T[:, i] = np.ravel(
        lda.transform(XT_CSP[i])
    )  # classificações de cada época nas N sub bandas - auto validação
    lda_list.append(lda)

#%% Bayesian Meta-Classifier
SCORE_T0 = SCORE_T[tT == class_ids[0], :]
SCORE_T1 = SCORE_T[tT == class_ids[1], :]
p0 = norm(np.mean(SCORE_T0, axis=0), np.std(SCORE_T0, axis=0))
p1 = norm(np.mean(SCORE_T1, axis=0), np.std(SCORE_T1, axis=0))
META_SCORE_T = np.log(p0.pdf(SCORE_T) / p1.pdf(SCORE_T))

#%% Final classification
clf_final = SVC(kernel='linear', C=10**(-4), probability=True)
Ejemplo n.º 3
0
    def fit(self, X, y):

        lda = LDA(store_covariance=self.cov)
        self.fit_ = lda.fit(X, y)
        return self
Ejemplo n.º 4
0
def train(workDir, classifier="LinearSvm", ldaDim=-1):
    print("Loading embeddings.")
    fname = "{}/labels.csv".format(workDir)
    labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    labels = list(
        map(itemgetter(1), map(os.path.split,
                               map(os.path.dirname,
                                   labels))))  # Get the directory.
    fname = "{}/reps.csv".format(workDir)
    embeddings = pd.read_csv(fname, header=None).as_matrix()
    le = LabelEncoder().fit(labels)
    labelsNum = le.transform(labels)

    nClasses = len(le.classes_)
    print("Training for {} classes.".format(nClasses))

    print(type(embeddings[0]))

    if classifier == 'LinearSvm':
        clf = SVC(C=1, kernel='linear', probability=True)
    elif classifier == 'GridSearchSvm':
        print("""
        Warning: In our experiences, using a grid search over SVM hyper-parameters only
        gives marginally better performance than a linear SVM with C=1 and
        is not worth the extra computations of performing a grid search.
        """)
        param_grid = [{
            'C': [1, 10, 100, 1000],
            'kernel': ['linear']
        }, {
            'C': [1, 10, 100, 1000],
            'gamma': [0.001, 0.0001],
            'kernel': ['rbf']
        }]
        clf = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
    elif classifier == 'GMM':  # Doesn't work best
        clf = GMM(n_components=nClasses)

    # ref:
    # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
    elif classifier == 'RadialSvm':  # Radial Basis Function kernel
        # works better with C = 1 and gamma = 2
        clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
    elif classifier == 'DecisionTree':  # Doesn't work best
        clf = DecisionTreeClassifier(max_depth=20)
    elif classifier == 'GaussianNB':
        clf = GaussianNB()

    # ref: https://jessesw.com/Deep-Learning/
    elif classifier == 'DBN':
        from nolearn.dbn import DBN
        clf = DBN(
            [embeddings.shape[1], 500, labelsNum[-1:][0] + 1
             ],  # i/p nodes, hidden nodes, o/p nodes
            learn_rates=0.3,
            # Smaller steps mean a possibly more accurate result, but the
            # training will take longer
            learn_rate_decays=0.9,
            # a factor the initial learning rate will be multiplied by
            # after each iteration of the training
            epochs=300,  # no of iternation
            # dropouts = 0.25, # Express the percentage of nodes that
            # will be randomly dropped as a decimal.
            verbose=1)

    if ldaDim > 0:
        clf_final = clf
        clf = Pipeline([('lda', LDA(n_components=ldaDim)), ('clf', clf_final)])

    clf.fit(embeddings, labelsNum)

    fName = "{}/classifier.pkl".format(workDir)
    print("Saving classifier to '{}'".format(fName))
    with open(fName, 'wb') as f:
        pickle.dump((le, clf), f)
Ejemplo n.º 5
0
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
                # train_indexes = flatten_repetitions(cross_validation_indexes[0])
                test_indexes = flatten_repetitions(cross_validation_indexes[1])

                # train_data_all_subject.append(np.asarray(all_data_per_char_as_matrix[train_indexes]).astype(np.float32))
                test_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[test_indexes]).astype(
                            np.float32))

                # train_tags_all_subject.append(target_per_char_as_matrix[train_indexes])
                test_tags_all_subject.append(
                    target_per_char_as_matrix[test_indexes])

            break

    model = LDA()

    from scipy import stats
    # train_data = stats.zscore(np.vstack(train_data_all_subject),axis=1)
    # train_tags = np.vstack(train_tags_all_subject).flatten()

    test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
    test_tags = np.vstack(test_tags_all_subject).flatten()
    import pandas as pd

    # final_train_matrix_with_tagging = np.hstack([train_data.reshape(train_data.shape[0] * train_data.shape[1], -1).astype(np.float32),train_tags.reshape(-1,1)])
    final_test_matrix_with_tagging = np.hstack([
        test_data.reshape(test_data.shape[0] * test_data.shape[1],
                          -1).astype(np.float32),
        test_tags.reshape(-1, 1)
    ])
Ejemplo n.º 7
0
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

from sklearn.preprocessing import StandardScaler

scx = StandardScaler()
X_train = scx.fit_transform(X_train)
X_test = scx.transform(X_test)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=None)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)
explained_variance = lda.explained_variance_ratio_

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print(cm)
Ejemplo n.º 8
0
#use prior 2 days of return as predictor values,
#with direction as response
X = snpret[['Lag1', 'Lag2']]
y = snpret['Direction']

#test data is split into 2 parts: before & after 2005,1,1
start_test = datetime.datetime(2005, 1, 1)

#create training & data set
X_train = X[X.index < start_test]
X_test = X[X.index >= start_test]
y_train = y[y.index < start_test]
y_test = y[y.index >= start_test]

#create parametrised models
models = [('LR', LogisticRegression()), ('LDA', LDA()), ('QDA', QDA()),
          ('LSVC', LinearSVC()),
          ('RSVM',
           SVC(C=1000000.0,
               cache_size=200,
               class_weight=None,
               coef0=0.0,
               degree=3,
               gamma=0.0001,
               kernel='rbf',
               max_iter=-1,
               probability=False,
               random_state=None,
               shrinking=True,
               tol=0.001,
               verbose=False)),
Ejemplo n.º 9
0
    plt.figure()
    plot_points(x)
    plt.axis('square')
    plt.tight_layout()
    save_fig('gda_2d_data.pdf')
    plt.show()

    plt.figure()
    plot_points(x)
    plot_contours(xx, yy, x_range, y_range, u, sigma)
    plt.axis('square')
    plt.tight_layout()
    save_fig('gda_2d_contours.pdf')
    plt.show()

    for k, clf in enumerate((LDA(), QDA())):
        clf.fit(X, Y)
        z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
        z = z.reshape(ngrid, ngrid)
        z_p = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
        yhat = clf.predict(np.c_[xx.ravel(), yy.ravel()])
        Yhat = make_one_hot(yhat)

        plt.figure()
        #plot_dboundaries(xx, yy, z, z_p)
        plot_dboundaries(xx, yy, z, Yhat)
        plot_points(x)
        plot_contours(xx, yy, x_range, y_range, u, sigma)
        plt.title(model_names[k])
        plt.axis('square')
        plt.tight_layout()
Ejemplo n.º 10
0
from collections import OrderedDict
from moabb.datasets.bnci import BNCI2014001

from moabb.datasets.alex_mi import AlexMI
from moabb.datasets.physionet_mi import PhysionetMI

datasets = [
    AlexMI(with_rest=True),
    BNCI2014001(),
    PhysionetMI(with_rest=True, feets=False)
]

pipelines = OrderedDict()
pipelines['MDM'] = make_pipeline(Covariances('oas'), MDM())
pipelines['TS'] = make_pipeline(Covariances('oas'), TSclassifier())
pipelines['CSP+LDA'] = make_pipeline(Covariances('oas'), CSP(8), LDA())

context = MotorImageryMultiClasses(datasets=datasets, pipelines=pipelines)

results = context.evaluate(verbose=True)

for p in results.keys():
    results[p].to_csv('../../results/MotorImagery/MultiClass/%s.csv' % p)

results = pd.concat(results.values())
print(results.groupby('Pipeline').mean())

res = results.pivot(values='Score', columns='Pipeline')
sns.lmplot(data=res, x='CSP+LDA', y='TS', fit_reg=False)
plt.xlim(0.25, 1)
plt.ylim(0.25, 1)
Ejemplo n.º 11
0
        py.xlabel("False Positive Rate")
        py.ylabel("True Positive Rate")
        py.legend()
    py.show()
    confusion_matrix = ConfusionMatrix(testinglabel, predicted_test)
    sns.heatmap(confusion_matrix, annot=True)
    Accuracy = accuracy(testinglabel, predicted_test)
    print(Accuracy)


Kfold(train_fold, newlabel, reduced_test, Test['label'])

# In[160]:

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
inbuilt_lda = lda.fit_transform(originaldata, Data['label'])

# In[161]:

inbuilt_lda = pd.DataFrame(inbuilt_lda)
inbuilt_lda = pd.concat([inbuilt_lda, Data['label']], axis=1)

# In[163]:

from numpy.linalg import inv


def LDA(lda_data, k):
    data = lda_data.drop(['label'], axis=1)
    LDA = pd.DataFrame()
Ejemplo n.º 12
0
plot_svc_decision_function(model, label = 'SVM', ax = ax)
w_svm = np.hstack((model.coef_[0],model.intercept_))
plot_data.plot_lines(X,y, w_svm, ax, label = 'SVM', linestyle = 'dashed')


# Applying LinearSVC
from sklearn.svm import LinearSVC
linearSVC = LinearSVC(C = 1e6)
linearSVC.fit(X,y)
linearSVC.coef_
linearSVC.intercept_
w_l_SVC = np.hstack((linearSVC.coef_[0],linearSVC.intercept_))
plot_data.plot_lines( X, y, w_l_SVC, ax, color = 'cyan', label = 'Lin SVC', linestyle = 'dashed')

# Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
for solver, color, linestyle in zip(['svd', 'eigen', 'lsqr'], ['red', 'green', 'blue'], ['dotted', 'dotted', 'dotted']):
  if (solver == 'svd'):
    lda = LDA(solver = solver, store_covariance=True)
  else:
    lda = LDA(solver = solver)
  lda.fit(X, y)
  lda.coef_
  lda.intercept_
  w_lda = np.hstack((lda.coef_[0],lda.intercept_))
  plot_data.plot_lines( X, y, w_lda, ax, color = color, label = 'LDA_' + solver, linestyle = linestyle)


plot_lda_decision_function(lda, label = 'lda')
lda.predict_proba(X)
Ejemplo n.º 13
0
    data[X] = data[X].apply(lambda x: x + np.random.rand())

    data[X] = data[X].apply(lambda x: x + 1)
    data[X], _ = boxcox(data[X])


for i in skewed:
    normalizing(i)

fe = False
if fe:
    # Feature engineering
    print("Feature engineering...")
    y = train.iloc[:, -1]
    Severity = ['mvar3', 'mvar4', 'mvar5']
    lda_Severity = LDA(n_components=5)
    lda_Severity = lda_Severity.fit(train[Severity], y)
    data['Severity'] = lda_Severity.transform(data[Severity])

    No_of_active = ['mvar16', 'mvar17', 'mvar19', 'mvar20', 'mvar18']
    lda_No_of_active = LDA(n_components=5)
    lda_No_of_active = lda_No_of_active.fit(train[No_of_active], y)
    data['No_of_active'] = lda_No_of_active.transform(data[No_of_active])

    Average_utilization = ['mvar21', 'mvar22', 'mvar23', 'mvar24']
    lda_Average_utilization = LDA(n_components=5)
    lda_Average_utilization = lda_Average_utilization.fit(
        train[Average_utilization], y)
    data['Average_utilization'] = lda_Average_utilization.transform(
        data[Average_utilization])
Ejemplo n.º 14
0
l1 = pd.DataFrame(l1)
l1 = pd.concat([l1, test["Pclass"]], join='outer', axis=1)
l1

x = l
y = titanic["Survived"]
x_test = l1

#lda analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.metrics import accuracy_score, confusion_matrix
ldatemp, rfcdepth, acc = 0, 0, 0
var = []
for i in range(1, 2):
    lda = LDA(n_components=i)
    x = lda.fit_transform(x, y)
    x_test = lda.transform(x_test)
    for j in range(1, 20):
        cl = RFC(max_depth=j, random_state=0)
        cl.fit(x, y)
        y_pred = cl.predict(x)
        if (acc < accuracy_score(y_pred, y)):
            acc = accuracy_score(y_pred, y)
            ldatemp = i
            rfcdepth = j
            var = y_pred
print(confusion_matrix(var, titanic["Survived"]))
print(str(rfcdepth) + " " + str(ldatemp) + " " + str(acc))
lda = LDA(n_components=ldatemp)
x = lda.fit_transform(x, y)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

old_paper_embedding = np.empty((1216315, 50), dtype=np.float32)
with open('data/arxiv_fasttext_vector.txt') as f:
    for i, line in enumerate(f):
        for j, x in enumerate(line.split()):
            x = float(x)
            old_paper_embedding[i, j] = x

labels = []
with open('save/arxiv_label.txt') as f:
    for line in f:
        label = line.strip()
        labels.append(label)

clf = LDA(n_components=2)
clf.fit(old_paper_embedding, labels)
paper_embedding = clf.transform(old_paper_embedding)

paper_embedding = tf.Variable(paper_embedding, trainable=False, name='paper_embedding', dtype=tf.float32)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
tf.train.Saver().save(sess, 'save/best')

config = projector.ProjectorConfig()

paper_projector = config.embeddings.add()
paper_projector.tensor_name = paper_embedding.name
paper_projector.metadata_path = 'label.txt'
Ejemplo n.º 16
0
# SVC : sklearn.svm.classes.SVC
# NSVC : sklearn.svm.classes.NuSVC
# OCSVM : sklearn.svm.classes.OneClassSVM
from sklearn.svm.classes import LinearSVC as LSVC, SVC, NuSVC as NSVC, OneClassSVM as OCSVM
# ABC : sklearn.ensemble.weight_boosting.AdaBoostClassifier
from sklearn.ensemble.weight_boosting import AdaBoostClassifier as ABC
# GBC : sklearn.ensemble.gradient_boosting.GradientBoostingClassifier
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier as GBC
# RFC : sklearn.ensemble.forest.RandomForestClassifier
# ETsC : sklearn.ensemble.forest.ExtraTreesClassifier
from sklearn.ensemble.forest import RandomForestClassifier as RFC
from sklearn.ensemble.forest import ExtraTreesClassifier as ETsC
_Models = {
    "LR": LR(),
    "LRCV": LRCV(),
    "LDA": LDA(),
    "QDA": QDA(),
    "KNC": KNC(),
    # "RNC" : RNC(),
    "DTC": DTC(),
    "ETC": ETC(),
    "GNB": GNB(),
    # "BDNB" : BDNB(),
    "MNB": MNB(),
    "BNB": BNB(),
    "LSVC": LSVC(),
    "SVC": SVC(),
    "NSVC": NSVC(),
    # "OCSVM" : OCSVM()
}
# 审查结果比较
Ejemplo n.º 17
0
        fig.show()

        # sklearn_LDA_cell_line = LDA(n_components=2)
        # sklearn_LDA_cell_line.fit(X, y)

    else:
        print("unknown index for an example!")

else:
    # -----------------------------------------------------
    # 6. use sklearn LDA
    # -----------------------------------------------------
    # apply sklearn LDA to iris data
    iris = load_iris()

    sklearn_LDA = LDA(n_components=2)
    sklearn_LDA_projection = sklearn_LDA.fit_transform(iris.data, iris.target)
    sklearn_LDA_projection = -sklearn_LDA_projection

    # plot the projections
    fig = plt.figure()

    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('Results from applying sklearn LDA to iris')
    # ax.set_xlabel(r'$W_1$')
    # ax.set_ylabel(r'$W_2$')
    ax.scatter(sklearn_LDA_projection[0:50, 0], sklearn_LDA_projection[0:50, 1],
               marker='o', s=marker_size, color='blue', label='setosa')
    ax.scatter(sklearn_LDA_projection[50:100, 0], sklearn_LDA_projection[50:100, 1],
               marker='o', s=marker_size, color='red', label='versicolor')
    ax.scatter(sklearn_LDA_projection[100:150, 0], sklearn_LDA_projection[100:150, 1],
Ejemplo n.º 18
0
def run_trainer(cfg, ftrain, interactive=False):
    # feature selection?
    datadir= cfg.DATADIR
    feat_picks= None
    txt= 'all'

    do_balance= False

    # preprocessing, epoching and PSD computation
    n_epochs= {}

    spfilter= cfg.SP_FILTER
    tpfilter= cfg.TP_FILTER

    # Load multiple files
    multiplier= 1
    raw, events= pu.load_multi(ftrain, spfilter=spfilter, multiplier=multiplier)
    #print(raw._data.shape)  #(17L, 2457888L)
    triggers= { cfg.tdef.by_value[c]:c for c in set(cfg.TRIGGER_DEF) }

    # Pick channels
    if cfg.CHANNEL_PICKS is None:
        picks= pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') 
        #print (picks) # [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]
    else:
        picks= []
        for c in cfg.CHANNEL_PICKS:
            if type(c)==int:
                picks.append(c)
            elif type(c)==str:
                picks.append( raw.ch_names.index(c) )
            else:
                raise RuntimeError, 'CHANNEL_PICKS is unknown format.\nCHANNEL_PICKS=%s'% cfg.CHANNEL_PICKS
 
    if max(picks) > len(raw.info['ch_names']):
        print('ERROR: "picks" has a channel index %d while there are only %d channels.'%\
            ( max(picks),len(raw.info['ch_names']) ) )
        sys.exit(-1)
# 
    # Spatial filter
    if cfg.SP_CHANNELS is None:
        spchannels= pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads')
    else:
        spchannels= []
        for c in cfg.SP_CHANNELS:
            if type(c)==int:
                spchannels.append(c)
            elif type(c)==str:
                spchannels.append( raw.ch_names.index(c) )
            else:
                raise RuntimeError, 'SP_CHANNELS is unknown format.\nSP_CHANNELS=%s'% cfg.SP_CHANNELS
# 
    # Spectral filter
    if tpfilter is not None:
        raw= raw.filter( tpfilter[0], tpfilter[1], picks=picks, n_jobs= mp.cpu_count() )
    if cfg.NOTCH_FILTER is not None:
        raw= raw.notch_filter( cfg.NOTCH_FILTER, picks=picks, n_jobs= mp.cpu_count() )
    
    # Read epochs
    try:
        
        epochs_train= Epochs(raw, events, triggers, tmin=cfg.EPOCH[0], tmax=cfg.EPOCH[1], proj=False,\
            picks=picks, baseline=None, preload=True, add_eeg_ref=False, verbose=False, detrend=None)
        #print (epochs_train)# <Epochs  |  n_events : 422 (all good), tmin : 1.0 (s), tmax : 2.0 (s), baseline : None, ~26.5 MB, data loaded,'LEFT_GO': 212, 'RIGHT_GO': 210>
    except:
        print('\n*** (trainer.py) ERROR OCCURRED WHILE EPOCHING ***\n')
        traceback.print_exc()
        if interactive:
            print('Dropping into a shell.\n')
            pdb.set_trace()
        raise RuntimeError
    '''
    epochs_data= epochs_train.get_data()
    print (epochs_data.shape)  #(422L, 16L, 513L)  trail*channel*caiyangdian
    
    #Visualize raw data for some channel in some trial
    ptrial=1
    trail=np.zeros((len(spchannels),epochs_data.shape[2]))
    print(trail)
    for pch in range(len(spchannels)):
        print(pch)
        trail[pch,::] =epochs_data[ptrial,pch,::]
    color=["b","g","r",'c','m','y','k','w',"b","g","r",'c','m','y','k','w']
    linstyle=['-','-','-','-','-','-','-','-','--','--','--','--','--','--','--','--',]
    for pch in range(len(spchannels)):
        print(color[pch])
        print(linstyle[pch])
        plt.plot(np.linspace(cfg.EPOCH[0], cfg.EPOCH[1], epochs_data.shape[2]), trail[pch,::],c=color[pch],ls=linstyle[pch],
                 label='channel %d'%(pch+1),lw=0.5)  
        
    plt.xlabel('time/s')  
    plt.ylabel('voltage/uV')  
    plt.title('Viewer')  
    plt.legend(loc="lower right")  
    plt.show()
    '''
    
    
    label_set= np.unique(triggers.values())
    sfreq= raw.info['sfreq']
  
    # Compute features
    res= get_psd_feature(epochs_train, cfg.EPOCH, cfg.PSD, feat_picks)
    X_data= res['X_data'] 
    Y_data= res['Y_data']
    wlen= res['wlen']
    w_frames= res['w_frames']
    psde= res['psde']
    psdfile= '%s/psd/psd-train.pcl'% datadir
    plot_pca_componet(X_data, Y_data)
    
    
    
  
    psdparams= cfg.PSD
#     print (events)
    for ev in triggers:
        print (ev) 
        n_epochs[ev]= len( np.where(events[:,-1]==triggers[ev])[0] )#{'RIGHT_GO': 150, 'LEFT_GO': 150} total trails
  
    # Init a classifier
    if cfg.CLASSIFIER=='RF':
        # Make sure to set n_jobs=cpu_count() for training and n_jobs=1 for testing.
        cls= RandomForestClassifier(n_estimators=cfg.RF['trees'], max_features='auto',\
            max_depth=cfg.RF['maxdepth'], n_jobs=mp.cpu_count(), class_weight='balanced' )
    elif cfg.CLASSIFIER=='LDA':
        cls= LDA()
#     elif cfg.CLASSIFIER=='rLDA':
#         cls= rLDA(cfg.RLDA_REGULARIZE_COEFF)
    else:
        raise RuntimeError, '*** Unknown classifier %s'% cfg.CLASSIFIER
  
    # Cross-validation
    if cfg.CV_PERFORM is not None:
        ntrials, nsamples, fsize= X_data.shape
  
        if cfg.CV_PERFORM=='LeaveOneOut':
            print('\n>> %d-fold leave-one-out cross-validation'% ntrials)
            cv= LeaveOneOut(len(Y_data))
        elif cfg.CV_PERFORM=='StratifiedShuffleSplit':
            print('\n>> %d-fold stratified cross-validation with test set ratio %.2f'% (cfg.CV_FOLDS, cfg.CV_TEST_RATIO))
            cv= StratifiedShuffleSplit(Y_data[:,0], cfg.CV_FOLDS, test_size=cfg.CV_TEST_RATIO, random_state=0)
        else:
            print('>> ERROR: Unsupported CV method yet.')
            sys.exit(-1)
        print('%d trials, %d samples per trial, %d feature dimension'% (ntrials, nsamples, fsize) )
  
        # Do it!
        scores= crossval_epochs(cv, X_data, Y_data, cls, cfg.tdef.by_value, do_balance)
         
         
        '''
        #learning curve        
        train_sizes,train_loss,test_loss=learning_curve(cls,X_data.reshape(X_data.shape[0]*X_data.shape[1],X_data.shape[2]),Y_data.reshape(Y_data.shape[0]*Y_data.shape[1]),train_sizes=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
        print(X_data.shape)
        print(Y_data.shape)
        train_loss_mean=np.mean(train_loss,axis=1)
        test_loss_mean=np.mean(test_loss,axis=1)
        plt.plot(train_sizes,train_loss_mean,label='training')
        plt.plot(train_sizes,test_loss_mean,label='Cross-validation')
        plt.xlabel('training examples')
        plt.ylabel('loss')
        plt.legend(loc='best')
        plt.show()  
        ''' 
           
 
        # Results
        print('\n>> Class information')
        for ev in np.unique(Y_data):
            print('%s: %d trials'% (cfg.tdef.by_value[ev], len(np.where(Y_data[:,0]==ev)[0])) )
        if do_balance:
            print('The number of samples was balanced across classes. Method:', do_balance)
  
        print('\n>> Experiment conditions')
        print('Spatial filter: %s (channels: %s)'% (spfilter, spchannels) )
        print('Spectral filter: %s'% tpfilter)
        print('Notch filter: %s'% cfg.NOTCH_FILTER)
        print('Channels: %s'% picks)
        print('PSD range: %.1f - %.1f Hz'% (psdparams['fmin'], psdparams['fmax']) )
        print('Window step: %.1f msec'% (1000.0 * psdparams['wstep'] / sfreq) )
        if type(wlen) is list:
            for i, w in enumerate(wlen):
                print('Window size: %.1f sec'% (w) )
                print('Epoch range: %s sec'% (cfg.EPOCH[i]))
        else:
            print('Window size: %.1f sec'% (psdparams['wlen']) )
            print('Epoch range: %s sec'% (cfg.EPOCH))
  
        #chance= 1.0 / len(np.unique(Y_data))
        cv_mean, cv_std= np.mean(scores), np.std(scores)
        print('\n>> Average CV accuracy over %d epochs'% ntrials)
        if cfg.CV_PERFORM in ['LeaveOneOut','StratifiedShuffleSplit']:
            print("mean %.3f, std: %.3f" % (cv_mean, cv_std) )
        print('Classifier: %s'% cfg.CLASSIFIER)
        if cfg.CLASSIFIER=='RF':
            print('            %d trees, %d max depth'% (cfg.RF['trees'], cfg.RF['maxdepth']) )
  
        if cfg.USE_LOG:
            logfile= '%s/result_%s_%s.txt'% (datadir, cfg.CLASSIFIER, txt)
            logout= open(logfile, 'a')
            logout.write('%s\t%.3f\t%.3f\n'% (ftrain[0], np.mean(scores), np.var(scores)) )
            logout.close()
  
    # Train classifier
    archtype= platform.architecture()[0] # (’64bit’, ‘Windows7’)
  
    clsfile= '%s/classifier/classifier-%s.pcl'% (datadir,archtype)
    print('\n>> Training classifier')
    X_data_merged= np.concatenate( X_data )
    Y_data_merged= np.concatenate( Y_data ) 
    timer= qc.Timer()
    cls.fit( X_data_merged, Y_data_merged)
    print('Trained %d samples x %d dimension in %.1f sec'% \
        (X_data_merged.shape[0], X_data_merged.shape[1], timer.sec()))
    # set n_jobs = 1 for testing
    cls.n_jobs= 1
  
 
    classes= { c:cfg.tdef.by_value[c] for c in np.unique(Y_data) }
    #save FEATURES'PSD':
    data= dict( cls=cls, psde=psde, sfreq=sfreq, picks=picks, classes=classes,
        epochs=cfg.EPOCH, w_frames=w_frames, w_seconds=psdparams['wlen'],
        wstep=psdparams['wstep'], spfilter=spfilter, spchannels=spchannels, refchannel=None,
        tpfilter=tpfilter, notch=cfg.NOTCH_FILTER, triggers=cfg.tdef )  
    qc.make_dirs('%s/classifier'% datadir)
    qc.save_obj(clsfile, data)
  
    # Show top distinctive features
    if cfg.CLASSIFIER=='RF':
        print('\n>> Good features ordered by importance')
        keys, _= qc.sort_by_value( list(cls.feature_importances_), rev=True )
        if cfg.EXPORT_GOOD_FEATURES:
            gfout= open('%s/good_features.txt'% datadir, 'w')
  
        # reverse-lookup frequency from fft
        if type(wlen) is not list:
            fq= 0
            fq_res= 1.0 / psdparams['wlen']
            fqlist= []
            while fq <= psdparams['fmax']:
                if fq >= psdparams['fmin']: fqlist.append(fq)
                fq += fq_res
  
            for k in keys[:cfg.FEAT_TOPN]:
                ch,hz= qc.feature2chz(k, fqlist, picks, ch_names=raw.ch_names)
                print('%s, %.1f Hz  (feature %d)'% (ch,hz,k) )
                if cfg.EXPORT_GOOD_FEATURES:
                    gfout.write( '%s\t%.1f\n'% (ch, hz) )
              
            if cfg.EXPORT_GOOD_FEATURES:
                if cfg.CV_PERFORM is not None:
                    gfout.write('\nCross-validation performance: mean %.2f, std %.2f\n'%(cv_mean, cv_std) )
                gfout.close()
            print()
        else:
            print('Ignoring good features because of multiple epochs.')
 
    
    # Test file
    if len(cfg.ftest) > 0:
        raw_test, events_test= pu.load_raw('%s'%(cfg.ftest), spfilter)
 
        '''
        TODO: implement multi-segment epochs
        '''
        if type(cfg.EPOCH[0]) is list:
            print('MULTI-SEGMENT EPOCH IS NOT SUPPORTED YET.')
            sys.exit(-1)
 
        epochs_test= Epochs(raw_test, events_test, triggers, tmin=cfg.EPOCH[0], tmax=cfg.EPOCH[1],\
            proj=False, picks=picks, baseline=None, preload=True, add_eeg_ref=False)
 
        
        psdfile= 'psd-test.pcl'
        if not os.path.exists(psdfile):
            print('\n>> Computing PSD for test set')
            X_test, y_test= pu.get_psd(epochs_test, psde, w_frames, int(sfreq/8))
            qc.save_obj(psdfile, {'X':X_test, 'y':y_test})
        else:
            print('\n>> Loading %s'% psdfile)
            data= qc.load_obj(psdfile)
            X_test, y_test= data['X'], data['y']
        
 
        score_test= cls.score( np.concatenate(X_test), np.concatenate(y_test) )
        print('Testing score', score_test)
 
        # running performance
        print('\nRunning performance over time')
        scores_windows= []
        timer= qc.Timer()
        for ep in range( y_test.shape[0] ):
            scores= []
            frames= X_test[ep].shape[0]
            timer.reset()
            for t in range(frames):
                X= X_test[ep][t,:]
                y= [y_test[ep][t]]
                scores.append( cls.score(X, y) )
                #print('%d /%d   %.1f msec'% (t,X_test[ep].shape[0],1000*timer.sec()) )
            print('Tested epoch %d, %.3f msec per window'%(ep, timer.sec()*1000.0/frames) )
            scores_windows.append(scores)
        scores_windows= np.array(scores_windows)  
Ejemplo n.º 19
0
def LDA_classification_aggregate_activity_scores(Data, labels):
    Labels = labels
    #Activity score features are sorted as label 0 then label 1, so we need to rearrange the labels (0s first then 1s)
    Labels.sort()
    scores = cross_val_score(LDA(solver='svd'), Data, Labels, cv=5)
    return scores.mean()
Ejemplo n.º 20
0
num2class = dict(enumerate(label_names))

class_nums = labels.sum(axis=0)
for i in range(len(label_names)):
    print(num2class[i], ":", class_nums[i], "of samples.")

features = pd.read_csv('features.csv', index_col=0, header=[0, 1, 2])
print("Features shape:", features.shape)
assert features.shape[0] == len(file_names) == class_nums.sum()

simple_labels = labels[:, :2] + labels[:, 2:4]
simple_labels = np.column_stack((simple_labels, labels[:, -1]))

y = np.nonzero(labels)[1]
y_weights = dict(enumerate(compute_class_weight('balanced', np.unique(y), y)))
X = LDA(n_components=2).fit_transform(features, y)

figsize = (8, 6)
plt.figure(figsize=figsize)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
ax = []
for c, i in zip(colors[:y.max() + 1], range(y.max() + 1)):
    idx = np.where(y == i)
    ax.append(plt.scatter(X[idx, 0], X[idx, 1], c=c, alpha=0.5))
plt.legend(ax, [num2class[i] for i in range(y.max() + 1)])
plt.show()

sy = np.nonzero(simple_labels)[1]
sy_weights = dict(
    enumerate(compute_class_weight('balanced', np.unique(sy), sy)))
sX = LDA(n_components=2).fit_transform(features, sy)
Ejemplo n.º 21
0
if n_components_pca == False:
    n_components_pca = min(5*(cell_cluster_number-1), int(0.01*gene_number))

n_components_lda = parameters.nComponentsLDA
if n_components_lda == False:
    n_components_lda = min(cell_cluster_number - 1, n_components_pca)

if n_components_pca < n_components_lda:
    print("--nComponentsPCA(-np) should not be less than --nComponentsLDA(-nl)")
    sys.exit (0)

expression_matrix = PCA (n_components=n_components_pca, svd_solver="full").fit_transform (expression_matrix)
print("Matrix shape after PCA: ", expression_matrix.shape)
oa = OAS(store_precision=False, assume_centered=False)
expression_matrix = LDA (n_components=n_components_lda,
                         covariance_estimator=OAS(store_precision=False, assume_centered=False),
                         solver='eigen').fit_transform (expression_matrix, cell_type_array)
print("Matrix shape after LDA: ", expression_matrix.shape)

average_size_subclusters = parameters.sizeSubcluster
celltype2subtype = {}
for celltype in all_cell_types:
    idx = np.where (cell_type_array == celltype)[0]
    n_clu = int (len (idx) / average_size_subclusters) + 1
    cells_of_this_celltype = expression_matrix[idx]
    predict_of_subtype = kmeans (cells_of_this_celltype, n_clusters=n_clu)
    subcelltype = np.array (["{}*SustechJinLab*{}".format (celltype, p) for p in predict_of_subtype.labels_])
    celltype2subtype[celltype] = sorted (list (set (subcelltype)), key=sort_key)
    cell_subtype_array[idx] = subcelltype

all_cell_subtypes = sorted (list (set (cell_subtype_array)), key=sort_key)
Ejemplo n.º 22
0
Model trained with all pokemon sightings
"""

trainFeatures = train[featureSelected]
trainLabels = train['pokemonId'].as_matrix()
testFeatures = test[featureSelected]
testLabels = test['pokemonId'].as_matrix()
    
#PCA
pca = PCA(n_components=5)
pca.fit(trainFeatures)
pcaTrainFeatures = pca.transform(trainFeatures)
pcaTestFeatures = pca.transform(testFeatures)

#LDA
lda = LDA(n_components=5)
lda.fit(trainFeatures, trainLabels)
ldaTrainFeatures = lda.transform(trainFeatures)
ldaTestFeatures = lda.transform(testFeatures)

#Logistic Regression
#lrModel = LogisticRegression()
#lrModel.fit(trainFeatures, trainLabels)
#acc = lrModel.score(testFeatures, testLabels)
#
#lrModel = LogisticRegression()
#lrModel.fit(pcaTrainFeatures, trainLabels)
#acc1 = lrModel.score(pcaTestFeatures, testLabels)
#
#lrModel = LogisticRegression()
#lrModel.fit(ldaTrainFeatures, trainLabels)
Ejemplo n.º 23
0
##############################################################################
# Create Pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.
#
# The CSP implementation is based on the MNE implementation. We selected 8 CSP
# components, as usually done in the literature.
#
# The Riemannian geometry pipeline consists in covariance estimation, tangent
# space mapping and finally a logistic regression for the classification.

pipelines = {}

pipelines["CSP+LDA"] = make_pipeline(CSP(n_components=8), LDA())

pipelines["RG+LR"] = make_pipeline(
    Covariances(), TangentSpace(), LogisticRegression(solver="lbfgs")
)

##############################################################################
# Evaluation
# ----------
#
# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001).
# The evaluation will return a DataFrame containing a single AUC score for
# each subject / session of the dataset, and for each pipeline.
#
# Results are saved into the database, so that if you add a new pipeline, it
# will not run again the evaluation unless a parameter has changed. Results can
Ejemplo n.º 24
0
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# Feature Scaling must be applied when you apply dimentionality techniques!!!
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Applying LDA to extract new independent variables (linear discriminants) that separate the most classes of the dependent variable
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)  # called linear discriminants
X_train = lda.fit_transform(
    X_train, y_train
)  # LDA is a supervised model so need to include the dependent variable
X_test = lda.transform(X_test)

# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
datasets = [Zhou2016(), BNCI2014001()]
subj = [1, 2, 3]
for d in datasets:
    d.subject_list = subj

##############################################################################
# The following lines go exactly as in the previous example, where we end up
# obtaining a pandas dataframe containing the results of the evaluation. We
# could set `overwrite` to False to cache the results, avoiding to restart all
# the evaluation from scratch if a problem occurs.
paradigm = LeftRightImagery()
evaluation = WithinSessionEvaluation(paradigm=paradigm,
                                     datasets=datasets,
                                     overwrite=False)
pipeline = make_pipeline(CSP(n_components=8), LDA())
results = evaluation.process({"csp+lda": pipeline})

##############################################################################
# Plotting Results
# ----------------
#
# We plot the results using the seaborn library. Note how easy it
# is to plot the results from the three datasets with just one line.

results["subj"] = [str(resi).zfill(2) for resi in results["subject"]]
g = sns.catplot(
    kind="bar",
    x="score",
    y="subj",
    col="dataset",
Ejemplo n.º 26
0
    def btnConvert_click(self):
        msgBox = QMessageBox()
        try:
            FoldFrom = np.int32(ui.txtFoldFrom.text())
            FoldTo = np.int32(ui.txtFoldTo.text())
        except:
            print("Please check fold parameters!")
            return

        if FoldTo < FoldFrom:
            print("Please check fold parameters!")
            return

        for fold in range(FoldFrom, FoldTo + 1):
            # Tol
            try:
                Tol = np.float(ui.txtTole.text())
            except:
                msgBox.setText("Tolerance is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            # Solver
            Solver = ui.cbSolver.currentData()
            # OutFile
            OutFile = ui.txtOutFile.text()
            OutFile = OutFile.replace("$FOLD$", str(fold))
            if not len(OutFile):
                msgBox.setText("Please enter out file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            # InFile
            InFile = ui.txtInFile.text()
            InFile = InFile.replace("$FOLD$", str(fold))
            if not len(InFile):
                msgBox.setText("Please enter input file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not os.path.isfile(InFile):
                msgBox.setText("Input file not found!\n" + InFile)
                print(InFile + " - not found!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            InData = mainIO_load(InFile)
            OutData = dict()
            OutData["imgShape"] = reshape_1Dvector(InData["imgShape"])
            # Data
            if not len(ui.txtITrData.currentText()):
                msgBox.setText("Please enter Input Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeData.currentText()):
                msgBox.setText("Please enter Input Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrData.text()):
                msgBox.setText("Please enter Output Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeData.text()):
                msgBox.setText("Please enter Output Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                XTr = InData[ui.txtITrData.currentText()]
                XTe = InData[ui.txtITeData.currentText()]
                if ui.cbScale.isChecked():
                    XTr = preprocessing.scale(XTr)
                    XTe = preprocessing.scale(XTe)
                    print("Whole of data is scaled X~N(0,1).")
            except:
                print("Cannot load data")
                return
            # NComponent
            try:
                NumFea = np.int32(ui.txtNumFea.text())
            except:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea < 1:
                msgBox.setText("Number of features must be greater than zero!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea > np.shape(XTr)[1]:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            # Label
            if not len(ui.txtITrLabel.currentText()):
                msgBox.setText("Please enter Train Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeLabel.currentText()):
                msgBox.setText("Please enter Test Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrLabel.text()):
                msgBox.setText(
                    "Please enter Train Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeLabel.text()):
                msgBox.setText("Please enter Test Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                YTr = InData[ui.txtITrLabel.currentText()][0]
                YTe = InData[ui.txtITeLabel.currentText()][0]
                OutData[ui.txtOTrLabel.text()] = reshape_1Dvector(YTr)
                OutData[ui.txtOTeLabel.text()] = reshape_1Dvector(YTe)
            except:
                print("Cannot load labels!")
            # Subject
            if ui.cbSubject.isChecked():
                if not len(ui.txtITrSubject.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Subject variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeSubject.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Subject variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrSubject.text()):
                    msgBox.setText(
                        "Please enter Train Output Subject variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeSubject.text()):
                    msgBox.setText(
                        "Please enter Test Output Subject variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrSubject.text()] = reshape_1Dvector(
                        InData[ui.txtITrSubject.currentText()])
                    OutData[ui.txtOTeSubject.text()] = reshape_1Dvector(
                        InData[ui.txtITeSubject.currentText()])
                except:
                    print("Cannot load Subject IDs")
                    return
            # Task
            if ui.cbTask.isChecked():
                if not len(ui.txtITrTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrTask.text()):
                    msgBox.setText(
                        "Please enter Output Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeTask.text()):
                    msgBox.setText(
                        "Please enter Output Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrTask.text()] = reshape_1Dvector(
                        InData[ui.txtITrTask.currentText()])
                    OutData[ui.txtOTeTask.text()] = reshape_1Dvector(
                        InData[ui.txtITeTask.currentText()])
                except:
                    print("Cannot load Tasks!")
                    return
            # Run
            if ui.cbRun.isChecked():
                if not len(ui.txtITrRun.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeRun.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrRun.text()):
                    msgBox.setText(
                        "Please enter Train Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeRun.text()):
                    msgBox.setText(
                        "Please enter Test Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrRun.text()] = reshape_1Dvector(
                        InData[ui.txtITrRun.currentText()])
                    OutData[ui.txtOTeRun.text()] = reshape_1Dvector(
                        InData[ui.txtITeRun.currentText()])
                except:
                    print("Cannot load Runs!")
                    return
            # Counter
            if ui.cbCounter.isChecked():
                if not len(ui.txtITrCounter.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeCounter.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrCounter.text()):
                    msgBox.setText(
                        "Please enter Train Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeCounter.text()):
                    msgBox.setText(
                        "Please enter Test Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrCounter.text()] = reshape_1Dvector(
                        InData[ui.txtITrCounter.currentText()])
                    OutData[ui.txtOTeCounter.text()] = reshape_1Dvector(
                        InData[ui.txtITeCounter.currentText()])
                except:
                    print("Cannot load Counters!")
                    return
            # Matrix Label
            if ui.cbmLabel.isChecked():
                if not len(ui.txtITrmLabel.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITemLabel.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrmLabel.text()):
                    msgBox.setText(
                        "Please enter Train Output Matrix Label variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTemLabel.text()):
                    msgBox.setText(
                        "Please enter Test Output Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrmLabel.text()] = InData[
                        ui.txtITrmLabel.currentText()]
                    OutData[ui.txtOTemLabel.text()] = InData[
                        ui.txtITemLabel.currentText()]
                except:
                    print("Cannot load matrix lables!")
                    return
            # Design
            if ui.cbDM.isChecked():
                if not len(ui.txtITrDM.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeDM.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrDM.text()):
                    msgBox.setText(
                        "Please enter Train Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeDM.text()):
                    msgBox.setText(
                        "Please enter Test Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrDM.text()] = InData[
                        ui.txtITrDM.currentText()]
                    OutData[ui.txtOTeDM.text()] = InData[
                        ui.txtITeDM.currentText()]
                except:
                    print("Cannot load design matrices!")
                    return
            # Coordinate
            if ui.cbCol.isChecked():
                if not len(ui.txtCol.currentText()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCol.text()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCol.text()] = InData[
                        ui.txtCol.currentText()]
                except:
                    print("Cannot load coordinator!")
                    return
            # Condition
            if ui.cbCond.isChecked():
                if not len(ui.txtCond.currentText()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCond.text()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCond.text()] = InData[
                        ui.txtCond.currentText()]
                except:
                    print("Cannot load conditions!")
                    return
            # FoldID
            if ui.cbFoldID.isChecked():
                if not len(ui.txtFoldID.currentText()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldID.text()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldID.text()] = reshape_1Dvector(
                        InData[ui.txtFoldID.currentText()])
                except:
                    print("Cannot load Fold ID!")
                    return
            # FoldInfo
            if ui.cbFoldInfo.isChecked():
                if not len(ui.txtFoldInfo.currentText()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldInfo.text()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldInfo.text()] = InData[
                        ui.txtFoldInfo.currentText()]
                except:
                    print("Cannot load Fold Info!")
                    return
                pass
            # Number of Scan
            if ui.cbNScan.isChecked():
                if not len(ui.txtITrScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrScan.text()] = reshape_1Dvector(
                        InData[ui.txtITrScan.currentText()])
                    OutData[ui.txtOTeScan.text()] = reshape_1Dvector(
                        InData[ui.txtITeScan.currentText()])
                except:
                    print("Cannot load NScan!")
                    return
            print("Running LDA:")
            model = LDA(n_components=NumFea, solver=Solver, tol=Tol)
            print("Training ...")
            XTr_New = model.fit_transform(XTr, YTr)
            OutData[ui.txtOTrData.text()] = XTr_New
            print("Testing ...")
            XTe_New = model.transform(XTe)
            OutData[ui.txtOTeData.text()] = XTe_New
            print("Saving ...")
            mainIO_save(OutData, OutFile)
            print("Fold " + str(fold) + " is DONE: " + OutFile)
        print("LDA is done.")
        msgBox.setText("LDA is done.")
        msgBox.setIcon(QMessageBox.Information)
        msgBox.setStandardButtons(QMessageBox.Ok)
        msgBox.exec_()
Ejemplo n.º 27
0
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.
processing_sampling_rate = 128
pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {"Target": 1, "NonTarget": 0}

# Riemannian geometry based classification
pipelines["RG+LDA"] = make_pipeline(
    XdawnCovariances(nfilter=5, estimator="lwf", xdawn_estimator="scm"),
    TangentSpace(),
    LDA(solver="lsqr", shrinkage="auto"),
)

pipelines["Xdw+LDA"] = make_pipeline(Xdawn(nfilter=2, estimator="scm"),
                                     Vectorizer(),
                                     LDA(solver="lsqr", shrinkage="auto"))

##############################################################################
# Evaluation
# ----------
#
# We define the paradigm (P300) and use all three datasets available for it.
# The evaluation will return a dataframe containing AUCs for each permutation
# and dataset size.

paradigm = P300(resample=processing_sampling_rate)
Ejemplo n.º 28
0
X_test_raw = np.concatenate(
    (np.array(hog_test), np.array(rgb_test), np.array(hsv_test)), axis=1)

X_train1 = []
y_train1 = []
X_val = []
y_val = []

X_train1, X_val, y_train1, y_val = train_test_split(X_train_raw,
                                                    y_train_raw,
                                                    test_size=0.2,
                                                    random_state=1)
print("yay")

#rf_class= RandomForestClassifier(n_estimators=200,max_depth=30, random_state=0,class_weight='balanced')
clf = LDA()
#print(cross_val_score(rf_class, X_train1, y_train1, scoring='accuracy', cv = 10))
accuracy = cross_val_score(clf, X_train1, y_train1,
                           scoring='accuracy').mean() * 100
print("Accuracy of Random Forests is: ", accuracy)
if (accuracy > 0.36):
    print("YAY")
    clf.fit(X_train1, y_train1)
    s = clf.score(X_val, y_val)
    print(s)
    if s > 0.36:
        clf.fit(X_train_raw, y_train_raw)
        predicted = clf.predict(X_test_raw)

        pickle.dump(clf, open(model_label, 'wb'))
        with open(test_label, 'w+') as csvfile:
Ejemplo n.º 29
0
kernel = {'linear', 'poly', 'rbf', 'sigmoid'}
# Applying Kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, 
                 kernel='rbf',
                  shrinkage='auto',
                 n_jobs=-1)
X_train = kpca.fit_transform(X_train)
X_task = kpca.transform(X_task)


solver = {'svd', 'lsqr', 'eigen'}
# Applying LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2,
          solver='svd',
          shrinkage='auto')
X_train = lda.fit_transform(X_train, y_target)
X_task = lda.transform(X_task)




neighbors = {3, 5, 10, 20}
# Training the K-NN model on the Training set
#minkowski with p=2 is equivalent to the standard Euclidean metric (ezek a defaultak)
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, 
                                  metric="minkowski", 
                                  p=2, 
                                  n_jobs=-1)
def main():

    full_path = 'data/adult.data'
    X, y = load_dataset(full_path)
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, y, random_state=RANDOM_STATE)

    kmeans_train_time = np.zeros(5)
    kmeans_predict_time = np.zeros(5)
    em_train_time = np.zeros(5)
    em_predict_time = np.zeros(5)
    nn_predict_time = np.zeros(9)

    # Ref: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
    # https://www.edupristine.com/blog/beyond-k-means
    # https://www.linkedin.com/pulse/finding-optimal-number-clusters-k-means-through-elbow-asanka-perera
    wcss = []
    for i in range(1, 11):
        kmeans = KMeans(n_clusters=i,
                        init='k-means++',
                        random_state=RANDOM_STATE)
        kmeans.fit(X_train)
        wcss.append(kmeans.inertia_)
    plt.figure()
    plt.plot(range(1, 11), wcss)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.savefig("UL2_WCSS.png")

    t_bef = time.time()
    kmeans = KMeans(n_clusters=3, random_state=RANDOM_STATE).fit(X_train)
    t_aft = time.time()
    kmeans_train_time[0] = t_aft - t_bef
    kmeans_label_train = kmeans.labels_
    centroids = kmeans.cluster_centers_

    t_bef = time.time()
    kmeans_label_test = kmeans.predict(X_test)
    t_aft = time.time()
    kmeans_predict_time[0] = t_aft - t_bef

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("Original")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=Y_train, alpha=0.5)
    ax2.set_title('K Means')
    ax2.scatter(X_train[:, 0], X_train[:, 1], c=kmeans_label_train, alpha=0.5)
    ax2.scatter(centroids[:, 0], centroids[:, 1], c='red')
    plt.savefig("UL2_kmeans.png")

    display_metrics("Original Kmeans Train", Y_train, kmeans_label_train)
    display_metrics("Original Kmeans Test", Y_test, kmeans_label_test)

    # Reference: https://jakevdp.github.io/PythonDataScienceHandbook/05.09-principal-component-analysis.html#:~:text=Choosing%20the%20number%20of%20components,pca%20%3D%20PCA().
    # PCA -------------------------------------------------------------
    plt.figure()
    pca = PCA().fit(X_train)
    eigenvalues = pca.explained_variance_
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('number of components')
    plt.ylabel('cumulative explained variance')
    plt.savefig("UL2_PCA_variance.png")

    pca = PCA(n_components=50, random_state=RANDOM_STATE)
    pca.fit(X_train)
    pca_trans_train = pca.transform(X_train)
    pca_trans_test = pca.transform(X_test)

    # Run on tranformed PCA dataset
    wcss_pca = []
    for i in range(1, 11):
        kmeans_pca = KMeans(n_clusters=i,
                            init='k-means++',
                            random_state=RANDOM_STATE)
        kmeans_pca.fit(pca_trans_train)
        wcss_pca.append(kmeans_pca.inertia_)
    plt.figure()
    plt.plot(range(1, 11), wcss_pca)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.savefig("UL2_WCSS_After_PCA.png")

    t_bef = time.time()
    kmeans_pca = KMeans(n_clusters=3,
                        random_state=RANDOM_STATE).fit(pca_trans_train)
    t_aft = time.time()
    kmeans_train_time[1] = t_aft - t_bef

    kmeans_pca.predict(pca_trans_train)
    kmeans_pca_label = kmeans_pca.labels_
    centroids_pca = kmeans_pca.cluster_centers_

    t_bef = time.time()
    kmeans_pca_label_test = kmeans_pca.predict(pca_trans_test)
    t_aft = time.time()
    kmeans_predict_time[1] = t_aft - t_bef

    display_metrics("Kmeans Train after PCA", Y_train, kmeans_pca_label)
    display_metrics("Kmeans Test after PCA", Y_test, kmeans_pca_label_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("K means before PCA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=kmeans_label_train, alpha=0.5)
    ax1.scatter(centroids[:, 0], centroids[:, 10], c='red')
    ax2.set_title("K Means after PCA")
    ax2.scatter(pca_trans_train[:, 0],
                pca_trans_train[:, 1],
                c=kmeans_pca_label,
                alpha=0.5)
    ax2.scatter(centroids_pca[:, 0], centroids_pca[:, 8], c='red')
    plt.savefig("UL2_kmeans_aft_PCA.png")

    # ICA -------------------------------------------------------------
    dims = range(1, 106)
    kurt = []
    for dim in dims:
        ica = FastICA(n_components=dim, tol=2.0, random_state=RANDOM_STATE)
        tmp = ica.fit_transform(X_train)
        tmp = pd.DataFrame(tmp)
        tmp = tmp.kurt(axis=0)
        kurt.append(tmp.abs().mean())
    plt.figure()
    plt.title("ICA Kurtosis")
    plt.xlabel("Independent Components")
    plt.ylabel("Avg Kurtosis Across IC")
    plt.plot(dims, kurt)
    plt.savefig("UL2_ICA_kurtosis.png")

    ica = FastICA(n_components=95,
                  algorithm='parallel',
                  whiten=True,
                  random_state=RANDOM_STATE)
    ica.fit(X_train)
    ica_trans_train = ica.transform(X_train)
    ica_trans_test = ica.transform(X_test)

    # Run on tranformed ICA dataset
    wcss_ica = []
    for i in range(1, 11):
        kmeans_ica = KMeans(n_clusters=i,
                            init='k-means++',
                            random_state=RANDOM_STATE)
        kmeans_ica.fit(ica_trans_train)
        wcss_ica.append(kmeans_ica.inertia_)
    plt.figure()
    plt.plot(range(1, 11), wcss_ica)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.savefig("UL2_WCSS_After_ICA.png")

    t_bef = time.time()
    kmeans_ica = KMeans(n_clusters=3,
                        random_state=RANDOM_STATE).fit(ica_trans_train)
    t_aft = time.time()
    kmeans_train_time[2] = t_aft - t_bef

    kmeans_ica.predict(ica_trans_train)
    kmeans_ica_label = kmeans_ica.labels_
    centroids_ica = kmeans_ica.cluster_centers_

    t_bef = time.time()
    kmeans_ica_label_test = kmeans_ica.predict(ica_trans_test)
    t_aft = time.time()
    kmeans_predict_time[2] = t_aft - t_bef

    display_metrics("Kmeans Train after ICA", Y_train, kmeans_ica_label)
    display_metrics("Kmeans Test after ICA", Y_test, kmeans_ica_label_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("K means before ICA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=kmeans_label_train, alpha=0.5)
    ax1.scatter(centroids[:, 0], centroids[:, 1], c='red')
    ax2.set_title('K Means after ICA')
    ax2.scatter(ica_trans_train[:, 0],
                ica_trans_train[:, 1],
                c=kmeans_ica_label,
                alpha=0.5)
    ax2.scatter(centroids_ica[:, 0], centroids_ica[:, 1], c='red')
    plt.savefig("UL2_kmeans_aft_ICA.png")

    # RP -------------------------------------------------------------
    rp = SparseRandomProjection(n_components=106, random_state=RANDOM_STATE)
    rp.fit(X_train)
    rp_trans_train = rp.transform(X_train)
    rp_trans_test = rp.transform(X_test)

    # Run on tranformed RP dataset
    wcss_rp = []
    for i in range(1, 11):
        kmeans_rp = KMeans(n_clusters=i,
                           init='k-means++',
                           random_state=RANDOM_STATE)
        kmeans_rp.fit(rp_trans_train)
        wcss_rp.append(kmeans_rp.inertia_)
    plt.figure()
    plt.plot(range(1, 11), wcss_rp)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.savefig("UL2_WCSS_After_RP.png")

    t_bef = time.time()
    kmeans_rp = KMeans(n_clusters=2,
                       random_state=RANDOM_STATE).fit(rp_trans_train)
    t_aft = time.time()
    kmeans_train_time[3] = t_aft - t_bef

    kmeans_rp_label = kmeans_rp.labels_
    centroids_rp = kmeans_rp.cluster_centers_

    t_bef = time.time()
    kmeans_rp_label_test = kmeans_rp.predict(rp_trans_test)
    t_aft = time.time()
    kmeans_predict_time[3] = t_aft - t_bef

    display_metrics("Kmeans Train after RP", Y_train, kmeans_rp_label)
    display_metrics("Kmeans Test after RP", Y_test, kmeans_rp_label_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("K means before RP")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=kmeans_label_train, alpha=0.5)
    ax1.scatter(centroids[:, 0], centroids[:, 1], c='red')
    ax2.set_title("K Means after RP")
    ax2.scatter(rp_trans_train[:, 0],
                rp_trans_train[:, 1],
                c=kmeans_rp_label,
                alpha=0.5)
    ax2.scatter(centroids_rp[:, 0], centroids_rp[:, 1], c='red')
    plt.savefig("UL2_kmeans_aft_RP.png")

    # LDA -------------------------------------------------------------
    lda = LDA(n_components=1)
    lda_trans_train = lda.fit_transform(X_train, Y_train)
    lda_trans_test = lda.transform(X_test)

    # Run on tranformed LDA dataset
    wcss_lda = []
    for i in range(1, 11):
        kmeans_lda = KMeans(n_clusters=i,
                            init='k-means++',
                            random_state=RANDOM_STATE)
        kmeans_lda.fit(lda_trans_train)
        wcss_lda.append(kmeans_lda.inertia_)
    plt.figure()
    plt.plot(range(1, 11), wcss_lda)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.savefig("UL2_WCSS_After_LDA.png")

    t_bef = time.time()
    kmeans_lda = KMeans(n_clusters=2,
                        random_state=RANDOM_STATE).fit(lda_trans_train)
    t_aft = time.time()
    kmeans_train_time[4] = t_aft - t_bef

    kmeans_lda_label = kmeans_lda.labels_
    centroids_lda = kmeans_lda.cluster_centers_

    t_bef = time.time()
    kmeans_lda_label_test = kmeans_lda.predict(lda_trans_test)
    t_aft = time.time()
    kmeans_predict_time[4] = t_aft - t_bef

    display_metrics("Kmeans Train after LDA", Y_train, kmeans_lda_label)
    display_metrics("Kmeans Test after LDA", Y_test, kmeans_lda_label_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("K means before LDA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=kmeans_label_train, alpha=0.5)
    ax1.scatter(centroids[:, 0], centroids[:, 1], c='red')
    ax2.set_title("K Means after LDA")
    ax2.scatter(lda_trans_train[:, 0],
                lda_trans_train[:, 0],
                c=kmeans_lda_label,
                alpha=0.5)
    ax2.scatter(centroids_lda[:, 0], centroids_lda[:, 0], c='red')
    plt.savefig("UL2_kmeans_aft_LDA.png")

    # Train time Different Dimensionality reduction algorithms kmeans
    classifier = [
        'Kmeans', 'Kmeans with PCA', 'Kmeans with ICA', 'Kmeans with RP',
        'Kmeans with LDA'
    ]
    np_classifier = np.array(classifier)
    plt.figure()
    plt.barh(np_classifier, kmeans_train_time, align='center')
    plt.title('Kmeans Train Time')
    plt.ylabel('Name')
    plt.xlabel('Time (seconds)')
    plt.savefig('UL2_Kmeans_Traintime.png', bbox_inches="tight")

    # Predict time Different Dimensionality reduction algorithms kmeans
    plt.figure()
    plt.barh(np_classifier, em_predict_time, align='center')
    plt.title('Kmeans Query Time')
    plt.ylabel('Name')
    plt.xlabel('Time (seconds)')
    plt.savefig('UL2_Kmeans_Querytime.png', bbox_inches="tight")

    # Expectation Maximization ---------------------------------------------------
    silhouette_score = []
    for i in range(2, 12):
        gmm = GaussianMixture(n_components=i,
                              n_init=2,
                              random_state=RANDOM_STATE).fit(X_train)
        gmm_predict = gmm.predict(X_train)
        silhouette_score.append(
            metrics.silhouette_score(X_train, gmm_predict, metric='euclidean'))
    plt.figure()
    plt.plot(range(1, 11), silhouette_score)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('silhouette_score')
    plt.savefig("UL2_SS.png")

    t_bef = time.time()
    gmm = GaussianMixture(n_components=9).fit(X_train)
    t_aft = time.time()
    em_train_time[0] = t_aft - t_bef

    gmm_label_train = gmm.predict(X_train)

    t_bef = time.time()
    gmm_label_test = gmm.predict(X_test)
    t_aft = time.time()
    em_predict_time[0] = t_aft - t_bef

    display_metrics("Original EM Train", Y_train, gmm_label_train)
    display_metrics("Original EM Test", Y_test, gmm_label_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("Original")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=Y_train, alpha=0.5)
    ax2.set_title("Expectation Maximization")
    ax2.scatter(X_train[:, 0], X_train[:, 1], c=gmm_label_train, alpha=0.5)
    plt.savefig("UL2_EM.png")

    # PCA --------------------
    silhouette_score_pca = []
    for i in range(2, 12):
        gmm_pca = GaussianMixture(
            n_components=i, n_init=2,
            random_state=RANDOM_STATE).fit(pca_trans_train)
        gmm_predict_pca = gmm_pca.predict(pca_trans_train)
        silhouette_score_pca.append(
            metrics.silhouette_score(pca_trans_train,
                                     gmm_predict_pca,
                                     metric='euclidean'))
    plt.figure()
    plt.plot(range(1, 11), silhouette_score_pca)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('silhouette_score')
    plt.savefig("UL2_SS_After_PCA.png")

    t_bef = time.time()
    gmm_pca = GaussianMixture(n_components=9).fit(pca_trans_train)
    t_aft = time.time()
    em_train_time[1] = t_aft - t_bef

    gmm_label_pca = gmm_pca.predict(pca_trans_train)

    t_bef = time.time()
    gmm_label_pca_test = gmm_pca.predict(pca_trans_test)
    t_aft = time.time()
    em_predict_time[1] = t_aft - t_bef

    display_metrics("EM Train after PCA", Y_train, gmm_label_pca)
    display_metrics("EM Test after PCA", Y_test, gmm_label_pca_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("EM before PCA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=gmm_label_train, alpha=0.5)
    ax2.set_title("EM after PCA")
    ax2.scatter(pca_trans_train[:, 0],
                pca_trans_train[:, 1],
                c=gmm_label_pca,
                alpha=0.5)
    plt.savefig("UL2_EM_aft_PCA.png")

    # ICA --------------------
    silhouette_score_ica = []
    for i in range(2, 12):
        gmm_ica = GaussianMixture(
            n_components=i, n_init=2,
            random_state=RANDOM_STATE).fit(ica_trans_train)
        gmm_predict_ica = gmm_ica.predict(ica_trans_train)
        silhouette_score_ica.append(
            metrics.silhouette_score(ica_trans_train,
                                     gmm_predict_ica,
                                     metric='euclidean'))
    plt.figure()
    plt.plot(range(1, 11), silhouette_score_ica)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('silhouette_score')
    plt.savefig("UL2_SS_After_ICA.png")

    t_bef = time.time()
    gmm_ica = GaussianMixture(n_components=3).fit(ica_trans_train)
    t_aft = time.time()
    em_train_time[2] = t_aft - t_bef

    gmm_label_ica = gmm_ica.predict(ica_trans_train)

    t_bef = time.time()
    gmm_label_ica_test = gmm_ica.predict(ica_trans_test)
    t_aft = time.time()
    em_predict_time[2] = t_aft - t_bef

    display_metrics("EM Train after ICA", Y_train, gmm_label_ica)
    display_metrics("EM Test after ICA", Y_test, gmm_label_ica_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("EM before ICA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=gmm_label_train, alpha=0.5)
    ax2.set_title("EM after ICA")
    ax2.scatter(ica_trans_train[:, 0],
                ica_trans_train[:, 1],
                c=gmm_label_ica,
                alpha=0.5)
    plt.savefig("UL2_EM_aft_ICA.png")

    # RP --------------------
    silhouette_score_rp = []
    for i in range(2, 12):
        gmm_rp = GaussianMixture(n_components=i,
                                 n_init=2,
                                 random_state=RANDOM_STATE).fit(rp_trans_train)
        gmm_predict_rp = gmm_rp.predict(rp_trans_train)
        silhouette_score_rp.append(
            metrics.silhouette_score(rp_trans_train,
                                     gmm_predict_rp,
                                     metric='euclidean'))
    plt.figure()
    plt.plot(range(1, 11), silhouette_score_rp)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('silhouette_score')
    plt.savefig("UL2_SS_After_RP.png")

    t_bef = time.time()
    gmm_rp = GaussianMixture(n_components=3).fit(rp_trans_train)
    t_aft = time.time()
    em_train_time[3] = t_aft - t_bef

    gmm_label_rp = gmm_rp.predict(rp_trans_train)

    t_bef = time.time()
    gmm_label_rp_test = gmm_rp.predict(rp_trans_test)
    t_aft = time.time()
    em_predict_time[3] = t_aft - t_bef

    display_metrics("EM Train after RP", Y_train, gmm_label_rp)
    display_metrics("EM Test after RP", Y_test, gmm_label_rp_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("EM before RP")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=gmm_label_train, alpha=0.5)
    ax2.set_title("EM after RP")
    ax2.scatter(rp_trans_train[:, 0],
                rp_trans_train[:, 1],
                c=gmm_label_rp,
                alpha=0.5)
    plt.savefig("UL2_EM_aft_RP.png")

    # LDA --------------------
    silhouette_score_lda = []
    for i in range(2, 12):
        gmm_lda = GaussianMixture(
            n_components=i, n_init=2,
            random_state=RANDOM_STATE).fit(lda_trans_train)
        gmm_predict_lda = gmm_lda.predict(lda_trans_train)
        silhouette_score_lda.append(
            metrics.silhouette_score(lda_trans_train,
                                     gmm_predict_lda,
                                     metric='euclidean'))
    plt.figure()
    plt.plot(range(1, 11), silhouette_score_lda)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('silhouette_score')
    plt.savefig("UL2_SS_After_LDA.png")

    t_bef = time.time()
    gmm_lda = GaussianMixture(n_components=2).fit(lda_trans_train)
    t_aft = time.time()
    em_train_time[4] = t_aft - t_bef

    gmm_label_lda = gmm_lda.predict(lda_trans_train)

    t_bef = time.time()
    gmm_label_lda_test = gmm_lda.predict(lda_trans_test)
    t_aft = time.time()
    em_predict_time[4] = t_aft - t_bef

    display_metrics("EM Train after LDA", Y_train, gmm_label_lda)
    display_metrics("EM Test after LDA", Y_test, gmm_label_lda_test)

    plt.figure()
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 6))
    ax1.set_title("EM before LDA")
    ax1.scatter(X_train[:, 0], X_train[:, 1], c=gmm_label_train, alpha=0.5)
    ax2.set_title("EM after LDA")
    ax2.scatter(lda_trans_train[:, 0],
                lda_trans_train[:, 0],
                c=gmm_label_lda,
                alpha=0.5)
    plt.savefig("UL2_EM_aft_LDA.png")

    # Train time Different Dimensionality reduction algorithms kmeans
    classifier = [
        'EM', 'EM with PCA', 'EM with ICA', 'EM with RP', 'EM with LDA'
    ]
    np_classifier = np.array(classifier)
    plt.figure()
    plt.barh(np_classifier, em_train_time, align='center')
    plt.title('EM Train Time')
    plt.ylabel('Name')
    plt.xlabel('Time (seconds)')
    plt.savefig('UL2_EM_Traintime.png', bbox_inches="tight")

    # Predict time Different Dimensionality reduction algorithms kmeans
    plt.figure()
    plt.barh(np_classifier, em_predict_time, align='center')
    plt.title('EM Query Time')
    plt.ylabel('Name')
    plt.xlabel('Time (seconds)')
    plt.savefig('UL2_EM_Querytime.png', bbox_inches="tight")

    #4.  Neural Network with projected data ---------------------------------------------------
    # Original run NN
    querytime = neural_network("Original NN", X_train, Y_train, X_test, Y_test)
    print("Original NN", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with PCA
    querytime = neural_network("NN with PCA", pca_trans_train, Y_train,
                               pca_trans_test, Y_test)
    print("NN with PCA", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with ICA
    querytime = neural_network("NN with ICA", ica_trans_train, Y_train,
                               ica_trans_test, Y_test)
    print("NN with ICA", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with RP
    querytime = neural_network("NN with RP", rp_trans_train, Y_train,
                               rp_trans_test, Y_test)
    print("NN with RP", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with LDA
    querytime = neural_network("NN with LDA", lda_trans_train, Y_train,
                               lda_trans_test, Y_test)
    print("NN with LDA", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])

    #5.  Neural Network with projected data and clustering -------------------------
    pca_trans_train_NN = np.column_stack((pca_trans_train, kmeans_pca_label))
    pca_trans_test_NN = np.column_stack(
        (pca_trans_test, kmeans_pca_label_test))
    ica_trans_train_NN = np.column_stack((ica_trans_train, kmeans_ica_label))
    ica_trans_test_NN = np.column_stack(
        (ica_trans_test, kmeans_ica_label_test))
    rp_trans_train_NN = np.column_stack((rp_trans_train, kmeans_rp_label))
    rp_trans_test_NN = np.column_stack((rp_trans_test, kmeans_rp_label_test))
    lda_trans_train_NN = np.column_stack((lda_trans_train, kmeans_lda_label))
    lda_trans_test_NN = np.column_stack(
        (lda_trans_test, kmeans_lda_label_test))

    # NN with PCA
    querytime = neural_network("NN with PCA clustering", pca_trans_train_NN,
                               Y_train, pca_trans_test_NN, Y_test)
    print("NN with PCA clustering", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with ICA
    querytime = neural_network("NN with ICA clustering", ica_trans_train_NN,
                               Y_train, ica_trans_test_NN, Y_test)
    print("NN with ICA clustering", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with RP
    querytime = neural_network("NN with RP clustering", rp_trans_train_NN,
                               Y_train, rp_trans_test_NN, Y_test)
    print("NN with RP clustering", querytime)
    # nn_predict_time = np.append(nn_predict_time, [querytime])
    # NN with LDA
    querytime = neural_network("NN with LDA clustering", lda_trans_train_NN,
                               Y_train, lda_trans_test_NN, Y_test)
    print("NN with LDA clustering", querytime)