Beispiel #1
0
def yj():
    params['mu0'] = np.random.randn()*0.2
    params['mu1'] = np.random.randn()*0.2
    params['sigma0'] = di.invgamma.rvs(3)
    params['sigma1'] = di.invgamma.rvs(3)
    sel, rawdata, normdata = get_data(data_yj, params)
    norm_trn_data = normdata.loc[sel['trn'], sel['feats']]
    norm_tst_data = normdata.loc[sel['tst'], sel['feats']]

    sklda = LDA()
    sklda.fit(norm_trn_data, sel['trnl'])
    error = (1-sklda.score(norm_tst_data, sel['tstl']))
    print("skLDA error: %f" % error)
    return error
def LDA(array, test_labels):

    #LDA
    from sklearn.lda import LDA

    print "LDA"
    print "Features\tTime"

    for pct in pct_features_list:
        num_features = int(pct * len(array[0]))
        start = time()
        LDA(n_components=num_features).fit(array, test_labels)
        end = time()
        print num_features, "\t", (end - start)
Beispiel #3
0
    def _fit_lda(self, X, y, sample_weight=None):
        """Helper to fit LDA."""
        self.classes = numpy.unique(y)
        self._lda = LDA(n_components=len(self.classes) - 1,
                        solver='lsqr',
                        shrinkage='auto')

        ts = self._ts.fit_transform(X, sample_weight=sample_weight)
        self._lda.fit(ts, y)

        W = self._lda.coef_.copy()
        self._W = numpy.dot(
            numpy.dot(W.T, numpy.linalg.pinv(numpy.dot(W, W.T))), W)
        return ts
Beispiel #4
0
def fit(X, y):
    # Do here you training
    #clf = LogisticRegression(penalty="l2")
    #clf = SVC(kernel='linear', probability=True, random_state=0)
    clf1 = LDA()
    #clf = ensemble.RandomForestClassifier(n_estimators=10, max_depth=8, min_samples_leaf=4, n_jobs=4, random_state=0)
    clf1.fit(X, y)
    #pred_y = clf1.predict_proba(X)[:,[1]]
    #pred_y2 = np.vstack([pred_y[0],pred_y[:-1]])
    #pred_y3 = np.vstack([pred_y[0],pred_y[0],pred_y[:-2]])
    #pred_y = np.concatenate((pred_y, pred_y2, pred_y3),axis=1)
    #clf2 = LDA()
    #clf2.fit(pred_y, y)
    return clf1
Beispiel #5
0
def checkeachClassfier(train_x, train_y, test_x, test_y):
    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(class_weight='auto'),
        SVC(gamma=2, C=1),
        DecisionTreeClassifier(max_depth=5),
        DecisionTreeClassifier(class_weight='auto'),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        RandomForestClassifier(class_weight='auto'),
        AdaBoostClassifier(),
        GaussianNB(),
        LDA(),
        QDA()
    ]

    classtitle = [
        "KNeighborsClassifier", "SVC", "SVC weighted", "SVC(gamma=2, C=1)",
        "DecisionTreeClassifier", "DecisionTreeClassifier weighted",
        "RandomForestClassifier", "RandomForestClassifier weighted",
        "AdaBoostClassifier", "GaussianNB", "LDA", "QDA"
    ]

    for i in range(len(classtitle)):
        try:
            ctitle = classtitle[i]
            clf = classifiers[i]
            clf.fit(train_x, train_y)
            train_pdt = clf.predict(train_x)
            MCC, Acc_p, Acc_n, Acc_all = get_Accs(train_y, train_pdt)
            print ctitle + ":"
            print "MCC, Acc_p , Acc_n, Acc_all(train): "
            print "%s,%s,%s,%s" % (str(MCC), str(Acc_p), str(Acc_n),
                                   str(Acc_all))
            test_pdt = clf.predict(test_x)
            MCC, Acc_p, Acc_n, Acc_all = get_Accs(test_y, test_pdt)
            print "MCC, Acc_p , Acc_n, Acc_all(test): "
            print "%s,%s,%s,%s" % (str(MCC), str(Acc_p), str(Acc_n),
                                   str(Acc_all))
            fn = "submission_%s.csv" % ctitle
            fout = open(fn, 'w')
            fout.write("ID,target\n")
            for index, eachline in enumerate(test_pdt):
                fout.write("%s,%s\n" %
                           (str(int(test_x[index][0])), str(test_pdt[index])))
            fout.close()
        except:
            print ctitle + ": error"
        print
Beispiel #6
0
def get_fsmethod (fsmethod, n_feats, n_subjs, n_jobs=1):

    if fsmethod == 'stats':
        return 'stats', None

    #Feature selection procedures
                                #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
    fsmethods = { 'rfe'       : RFE(estimator=SVC(kernel="linear"), step=0.05, n_features_to_select=2),
                                #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
                  'rfecv'     : RFECV(estimator=SVC(kernel="linear"), step=0.05, loss_func=zero_one), #cv=3, default; cv=StratifiedKFold(n_subjs, 3)
                                #Univariate Feature selection: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html
                  'univariate': SelectPercentile(f_classif, percentile=5),
                                #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFpr.html
                  'fpr'       : SelectFpr (f_classif, alpha=0.05),
                                #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFdr.html
                  'fdr'       : SelectFdr (f_classif, alpha=0.05),
                                #http://scikit-learn.org/stable/modules/feature_selection.html
                  'extratrees': ExtraTreesClassifier(n_estimators=50, max_features='auto', compute_importances=True, n_jobs=n_jobs, random_state=0),

                  'pca'       : PCA(n_components='mle'),
                  'rpca'      : RandomizedPCA(random_state=0),
                  'lda'       : LDA(),
    }

    #feature selection parameter values for grid search
    max_feats = ['auto']
    if n_feats < 10:
        feats_to_sel = range(2, n_feats, 2)
        n_comps = range(1, n_feats, 2)
    else:
        feats_to_sel = range(2, 20, 4)
        n_comps = range(1, 30, 4)
    max_feats.extend(feats_to_sel)

    n_comps_pca = list(n_comps)
    n_comps_pca.extend(['mle'])

    fsgrid =    { 'rfe'       : dict(estimator_params = [dict(C=0.1), dict(C=1), dict(C=10)], n_features_to_select = feats_to_sel),
                  'rfecv'     : dict(estimator_params = [dict(C=0.1), dict(C=1), dict(C=10)]),
                  'univariate': dict(percentile = [1, 3, 5, 10]),
                  'fpr'       : dict(alpha = [1, 3, 5, 10]),
                  'fdr'       : dict(alpha = [1, 3, 5, 10]),
                  'extratrees': dict(n_estimators = [1, 3, 5, 10, 30, 50], max_features = max_feats),
                  'pca'       : dict(n_components = n_comps_pca, whiten = [True, False]),
                  'rpca'      : dict(n_components = n_comps, iterated_power = [3, 4, 5], whiten = [True, False]),
                  'lda'       : dict(n_components = n_comps)
    }

    return fsmethods[fsmethod], fsgrid[fsmethod]
 def fit(self, X, y):
     """ Fit the data """
     n_len = len(X[0])
     if n_len > 30:
         n_cut = 13
     elif n_len > 15:
         n_cut = 7
     else:
         n_cut = 3
     uni = FeatureUnion([('lda', LDA(n_components=n_cut - 1)),
                         ('pca', PCA(n_components=n_cut))])
     pipe = Pipeline([('scaler', MinMaxScaler()), ('union', uni)])
     self.pipe = pipe
     self.pipe.fit(X, y)
     return self
    def __init__(self, classifier=None, debug=False):
        np.random.seed(10)
        if classifier == "LDA":
            self.classifier = LDA()
        else:
            self.classifier = DecisionTreeClassifier(random_state=0)
        self.dim_reducer = PCA()
        self.trainDataMatrix = None
        self.labels = None
        self.trained = False

        # debug mode restricts trains/tests to 100 data points
        self.debug = debug
        self.debug_training_len = 10000
        self.debug_len = 100
def dim_reduction_LDA(X,Y,n_dim):
    """ Reduce the dimension by PCA.

    :param X: matrix data (n*k), n is the number of samples. k is the dimension of each sample
    :param n_dim: number of dimension we desired to reduce to.
    :param Y: reference or labels
    :return reduced_X:matrix data(n*n_dim)
    """
    try:
        reduced_X = LDA(n_components=n_dim).fit_transform(X,Y)
    except:
        print "dimension error"
        reduced_X = X
    finally:
        return np.array(reduced_X)
Beispiel #10
0
def lda(arr0, target, n_components):
    from sklearn.lda import LDA
    matrix = np.array(arr0)
    target = np.array(target)
    temp = LDA(n_components=n_components).fit(matrix, target)
    coef = temp.coef_
    # covariance = temp.covariance_
    mean = temp.means_
    priors = temp.priors_
    scalings = temp.scalings_
    xbar = temp.xbar_
    # label = data_utility.retrieve_nan_index(temp.transform(matrix).tolist(), index)
    label = temp.transform(matrix).tolist()
    return label, coef.tolist(), mean.tolist(), priors.tolist(
    ), scalings.tolist(), xbar.tolist()
Beispiel #11
0
    def features(self, pixels, gt=None):
        #grab feature stack
        fullFeatures = naive_features(pixels)
        print fullFeatures.shape

        #if the LDA from ground truth exists already, transform new features
        if gt == None and self.lda != None:
            print self.lda
            return self.lda.transform(fullFeatures)
        assert gt != None

        #otherwise, train LDA
        self.lda = LDA(n_components=self.n_comp).fit(fullFeatures, gt)
        print self.lda
        return self.lda.transform(fullFeatures)
Beispiel #12
0
def lda_data(X, y, n_components=2, num_data_points=-1):

    lda = LDA(n_components=n_components)

    if num_data_points > 0:
        X = X[:num_data_points, :]
        y = y[:num_data_points]

    print "Performing mapping"
    start = timeit.default_timer()
    mapped = lda.fit_transform(X, y)
    end = timeit.default_timer()
    print "Mapping completed in %f seconds" % (end - start)

    return mapped, lda
Beispiel #13
0
    def drawLDA(X_true,X_false,X_test,suffix=""):
        X=X_true+X_false
        Y=[1]*len(X_true)+[0]*len(X_false)
        plc=0
        lda = LDA(solver="eigen",n_components=2)
        canfit=False
        hred = False
        try:
            lda.fit(X,Y)
            canfit=True
        except :
            try:
                print("fit error")
                X = np.array(X)
                X = X[:,:140]
                lda.fit(X,Y)
                canfit=True
                hred=True
            except:
                print("cannot visualize")
        if(not canfit):
            return
        if(hred):
            Xlda_true = lda.transform(np.array(X_true)[:,:140])
            Xlda_false = lda.transform(np.array(X_false)[:,:140])
        else:
            Xlda_true = lda.transform(X_true)
            Xlda_false = lda.transform(X_false)
        plt.scatter(Xlda_true[:,0],Xlda_true[:,1],color=plp[plc][0],marker=plp[plc][1],label="thbgm")
        plc+=1
        plt.scatter(Xlda_false[:,0],Xlda_false[:,1],color=plp[plc][0],marker=plp[plc][1],label="not thbgm")
        plc+=1
        if(len(X_test)>0):
            if(hred):
                Xlda_test = lda.transform(np.array(X_test)[:,:140])
            else:
                Xlda_test = lda.transform(np.array(X_test))
            plt.scatter(Xlda_test[:,0],Xlda_test[:,1],color=plp[plc][0],marker=plp[plc][1],label="test")
            plc+=1

        print(lda.coef_.shape)

        plt.xlabel("feature1")
        plt.ylabel("feature2")
        plt.title("Classification with "+useFeature)
        plt.legend()
        plt.savefig("./learn/visualize/lda_"+useFeature+suffix+".png")
        plt.clf()
Beispiel #14
0
def learners(clf=None, kwds=None):
    "Return dict of available classifier"
    models = {}

    # common classifiers
    models['LinearSVC'] = svm.LinearSVC()
    models['SVC'] = svm.SVC()
    models['KNeighborsClassifier'] = KNeighborsClassifier()
    models['KNeighborsClassifier'].n_jobs = 8
    models['RandomForestClassifier'] = RandomForestClassifier()
    models['ExtraTreesClassifier'] = ExtraTreesClassifier()
    models['GaussianNB'] = GaussianNB()
    models['BernoulliNB'] = BernoulliNB()
    models['SGDClassifier'] = SGDClassifier()
    models['RidgeClassifier'] = RidgeClassifier(solver='lsqr')
    models['GradientBoostingClassifier'] = GradientBoostingClassifier()
    models['DecisionTreeClassifier'] = DecisionTreeClassifier()
    models['PCA'] = PCA()
    models['XGBClassifier'] = XGBClassifier()

    # common ensemble classifiers
    models['AdaBoostClassifier'] = AdaBoostClassifier()
    models['BaggingClassifier'] = BaggingClassifier()

    # examples how to construct pipelines
    steps = [('PCA', PCA(n_components='mle', whiten=True)),
             ('clf', models['RandomForestClassifier'])]
    models['pca_rfc'] = Pipeline(steps=steps)
    steps = [('PCA', PCA(n_components='mle', whiten=True)),
             ('clf', models['KNeighborsClassifier'])]
    models['pca_knc'] = Pipeline(steps=steps)
    steps = [('PCA', PCA(n_components='mle', whiten=True)),
             ('clf', models['SVC'])]
    models['pca_svc'] = Pipeline(steps=steps)
    steps = [('LDA', LDA()), ('clf', models['RandomForestClassifier'])]
    models['lda_rfc'] = Pipeline(steps=steps)

    # common regressors
    models['RandomForestRegressor'] = RandomForestRegressor()
    models['ExtraTreesRegressor'] = ExtraTreesRegressor()
    models['DecisionTreeRegressor'] = DecisionTreeRegressor()
    models['SVR'] = SVR()
    models['SGDRegressor'] = SGDRegressor()
    models['GradientBoostingRegressor'] = GradientBoostingRegressor()
    models['AdaBoostRegressor'] = AdaBoostRegressor()
    models['BaggingRegressor'] = BaggingRegressor()

    return models
Beispiel #15
0
def fit_LDA_from_codes_file(codes_file,
                            clique_idx,
                            lda_components=[50, 100, 200],
                            outlda="LDAs.pk"):
    """Fits and LDA from a codes file and saves it into a new pickle file."""

    clique_idx = np.asarray(load_pickle(clique_idx))
    codes = np.asarray(load_pickle(codes_file))

    # Remove Nones
    none_idx = np.where(np.equal(codes, None))[0]
    codes = np.delete(codes, none_idx, axis=0)
    clique_idx = np.delete(clique_idx, none_idx, axis=0)
    # Hack to make it the right shape
    C = np.zeros((codes.shape[0], codes[0].shape[0]))
    k = 0
    for code in codes:
        C[k] = code
        k += 1
    codes = C

    # Remove nans
    nan_idx = np.where(np.isnan(codes))[0]
    codes = np.delete(codes, nan_idx, axis=0)
    clique_idx = np.delete(clique_idx, nan_idx, axis=0)
    print codes.shape

    # Remove infs
    inf_idx = np.where(np.isinf(codes))[0]
    codes = np.delete(codes, inf_idx, axis=0)
    clique_idx = np.delete(clique_idx, inf_idx, axis=0)
    print codes.shape

    print "LDA components: ", lda_components
    #return codes, clique_idx

    res = []
    k = 0
    while k < len(lda_components):
        c = lda_components[k]
        lda = LDA(n_components=c)
        try:
            lda.fit(codes, clique_idx)
            res.append(lda)
            k += 1
        except:
            print "LDA error, trying again"
    save_pickle(res, outlda)
Beispiel #16
0
def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=300,eta=0.01,min_child_weight=20,depth=8, subsample=0.1,col=0.7)
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
    def optimize(self, X, y):
        clf = LDA()
        scores = []
        train_times = []
        for train, test in StratifiedKFold(y, 10):
            X_train, X_test, y_train, y_test = (X[train], X[test], y[train],
                                                y[test])
            clf.fit(X_train.toarray(), y_train)
            t0 = self._timer()
            scores.append(clf.score(X_test.toarray(), y_test))
            train_times.append(self._timer() - t0)

        self._mean_score = np.mean(scores)
        self._score_std = np.var(scores)
        self._mean_train_time = np.mean(train_times)
        self._train_time_std = np.var(train_times)
def lda_model(x_train, y_train, x_test, y_test):
    global get_test

    print "LDA model learning..."

    start_time = time.time()
    #LDA assumes common variance matrix among classes, while QDA doesn't
    clf = LDA()
    #clf = QDA()
    clf.fit(x_train, y_train)

    learning_time = time.time() - start_time
    print "training time is: {:.5f} seconds.".format(learning_time)
    '''
    #use LDA to do dimensionality reduction, reduce to n_class-1 dimensions
    x_t = clf.transform(x_train)
    print x_train.shape
    print x_t.shape
    print x_train[:3]
    print x_t[:3]
    '''

    print "Model Prediction..."
    #y_predict = clf.predict(x_test)

    start_time = time.time()
    #get probability prediction
    y_prob = clf.predict_proba(x_test)

    prediction_time = time.time() - start_time
    print "prediction time is: {:.5f} seconds.".format(prediction_time)

    if get_test == True:
        #the data is from real test set
        #output to file
        output_result(y_prob)
    else:
        #the test set is split from the train set, compute the loss function value
        encoder = LabelEncoder()
        #encode string label 'Class_1', 'Class_2',... to [0,1,...,8]
        y_true = encoder.fit_transform(y_test)
        #the classe labels in encoder is consistent with the class labels in the classifier
        assert (encoder.classes_ == clf.classes_).all()
        #compute the value for loss function
        score = logloss_mc(y_true, y_prob)
        print(
            " -- Multiclass logloss on validation set: {:.5f}.".format(score))
    def train(self,workDir,classifier,ldaDim):
        fname = "{}labels.csv".format(workDir) #labels of faces
        print("Loading labels " + fname + " csv size: " +  str(os.path.getsize("/home/hatice/PycharmProjects/newOpenface/generated-embeddings/reps.csv")))
        if os.path.getsize(fname) > 0:
            print(fname + " file is not empty")
            labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
            print(labels)
        else:
            print(fname + " file is empty")
            labels = "1:mini/dummy/1.png"  #creating a dummy string to start the process
        logger.debug(map(os.path.dirname, labels))
        logger.debug(map(os.path.split,map(os.path.dirname, labels)))
        logger.debug(map(itemgetter(1),map(os.path.split,map(os.path.dirname, labels))))
        labels = map(itemgetter(1),map(os.path.split,map(os.path.dirname, labels)))

        fname = "{}reps.csv".format(workDir) # Representations of faces
        fnametest = format(workDir) + "reps.csv"
        print("Loading embedding " + fname + " csv size: " + str(os.path.getsize(fname)))
        if os.path.getsize(fname) > 0:
            print(fname + " file is not empty")
            embeddings = pd.read_csv(fname, header=None).as_matrix() # Get embeddings as a matrix from reps.csv
        else:
            print(fname + " file is empty")
            embeddings = np.zeros((2,150)) #creating an empty array since csv is empty

        self.le = LabelEncoder().fit(labels) # LabelEncoder is a utility class to help normalize labels such that they contain only values between 0 and n_classes-1
        # Fits labels to model
        labelsNum = self.le.transform(labels)
        nClasses = len(self.le.classes_)
        print("Training for {} classes.".format(nClasses))

        if classifier == 'LinearSvm':
            self.clf = SVC(C=1, kernel='linear', probability=True)
        elif classifier == 'GMM':
            self.clf = GMM(n_components=nClasses)

        if ldaDim > 0:
            clf_final =  self.clf
            self.clf = Pipeline([('lda', LDA(n_components=ldaDim))
                ('clf', clf_final)])

        self.clf.fit(embeddings, labelsNum) #link embeddings to labels

        fName = "{}classifier.pkl".format(workDir)
        print("Saving classifier to '{}'".format(fName))
        with open(fName, 'w') as f:
            pickle.dump((self.le,  self.clf), f) # Creates character stream and writes to file to use for recognition
def lda(data_matrix, target, n_components):
    """
    Linear Discriminant Analysis (LDA)
    Adapted from: http://scikit-learn.org/stable/_downloads/plot_pca_vs_lda.py
    Args:
        - data_matrix: a matrix-like object containing the data, columns
            for features and rows for an item (developer in this case)
        - target: an array-like object contaning the class of the data item
            (developer) at the respective row
        - n_components: the number of principal components to be extracted
            (0 < n_components <= #features)
    Return:
        - a matrix-like object contained the transformed data with n_components
             columns
    """
    lda_obj = LDA(n_components=n_components)
    return lda_obj.fit(data_matrix, target).transform(data_matrix)
Beispiel #21
0
def dimensionalReduction(df_list, method, methods_list):
    '''
    
    :param df_list: df
    :param methods_list: methods of dimension reduction
    :return: 
    '''
    #methods = {'pca':[n_components],'lda':[n_components],'tSNE':[n_components]}
    df_list = pd.DataFrame(df_list)
    print(df_list.shape)
    if method not in methods_list.keys():
        raise ValueError("please use method in method_list")
    if method == 'pca':
        args = methods_list[method]
        model = PCA(n_components=args)
        x_pca = model.fit_transform(df_list)
        return x_pca

    if method == 'lda':
        args = methods_list[method]
        model = LDA(n_components=args)
        y = df_list[['target']]
        x_lda = model.fit_transform(df_list.drop('target', axis=1), y)
        return x_lda

    if method == 'tSNE':
        args = methods_list[method]
        tsne = manifold.TSNE(n_components=args)
        X_tsne = tsne.fit_transform(df_list)
        return X_tsne

    if method == 'lle':
        args = methods_list[method]
        lle = manifold.LocallyLinearEmbedding(n_components=args)
        x_lle = lle.fit_transform(df_list)
        return x_lle

    if method == 'isomap':
        args = methods_list[method]
        x_iso = manifold.Isomap(n_components=args).fit_transform(df_list)
        return x_iso

    if method == 'mds':
        args = methods_list[method]
        x_mds = manifold.MDS(n_components=args).fit_transform(df_list)
        return x_mds
Beispiel #22
0
def LDA(data, label, pred_data, pred_last):
    '''not good,不需要规范化
    '''
    data = np.array(data)
    pred_data = np.array(pred_data)
    label = np.array(label)
    pred_last = np.array(pred_last)
    from sklearn.lda import LDA
    gnb = LDA()
    gnb.fit(data, label)

    print gnb.score(data, label)
    pred_result = gnb.predict(pred_data)
    print("Number of mislabeled points out of a total %d points : %d" %
          (pred_data.shape[0], (pred_last != pred_result).sum()))
    print gnb.score(pred_data, pred_last)
    return pred_result
Beispiel #23
0
def train(subject, data_path, plot=False):
    d = load_train_data(data_path, subject)
    x, y = d['x'], d['y']
    print 'n_preictal', np.sum(y)
    print 'n_inetrictal', np.sum(y - 1)
    n_channels = x.shape[1]
    n_fbins = x.shape[2]

    x, y = reshape_data(x, y)
    data_scaler = StandardScaler()
    x = data_scaler.fit_transform(x)

    lda = LDA()
    lda.fit(x, y)
    coef = lda.scalings_ * lda.coef_[:1].T
    channels = []
    fbins = []
    for c in range(n_channels):
        fbins.extend(range(n_fbins))  # 0- delta, 1- theta ...
        channels.extend([c] * n_fbins)

    if plot:
        fig = plt.figure()
        for i in range(n_channels):
            if n_channels == 24:
                fig.add_subplot(4, 6, i)
            else:
                fig.add_subplot(4, 4, i)
            ax = plt.gca()
            ax.set_xlim([0, n_fbins])
            ax.set_xticks(np.arange(0.5, n_fbins + 0.5, 1))
            ax.set_xticklabels(np.arange(0, n_fbins))
            max_y = max(abs(coef)) + 0.01
            ax.set_ylim([0, max_y])
            ax.set_yticks(
                np.around(np.arange(0, max_y, max_y / 4.0), decimals=1))
            for label in (ax.get_xticklabels() + ax.get_yticklabels()):
                label.set_fontsize(15)
            plt.bar(range(0, n_fbins),
                    abs(coef[i * n_fbins:i * n_fbins + n_fbins]))
        fig.suptitle(subject, fontsize=20)
        plt.show()

    coefs = np.reshape(coef, (n_channels, n_fbins))
    return lda, data_scaler, coefs
def classifier_comparison(X, y):
    """
    分类器比较

    Args:
        X: training samples, size=[n_samples, n_features]
        y: class labels, size=[n_samples, 1]
    Returns:
        None
    """
    from sklearn import grid_search
    from sklearn.svm import SVC
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.lda import LDA
    from sklearn.qda import QDA
    import scipy

    # Exhaustive Grid Search
    exhaustive_parameters = {'kernel':['rbf'], 'C':[1, 10, 100, 1000], 'gamma':[1e-3, 1e-4]}
    clf_SVC_exhaustive = grid_search.GridSearchCV(SVC(), exhaustive_parameters)
    # Randomized Parameter Optimization
    randomized_parameter = {'kernel':['rbf'], 'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1)}
    clf_SVC_randomized = grid_search.RandomizedSearchCV(SVC(), randomized_parameter)

    names = ["Linear SVM", "RBF SVM",
             "RBF SVM with Grid Search", "RBF SVM with Random Grid Search", 
             "Decision Tree", "Random Forest", 
             "AdaBoost", "Naive Bayes", "LDA", "QDA"]
    classifiers = [
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        clf_SVC_exhaustive,
        clf_SVC_randomized,
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        AdaBoostClassifier(),
        GaussianNB(),
        LDA(),
        QDA()]

    for name, clf in zip(names, classifiers):
        logger.info('Use %s:' % (name))
        train_classifier(clf, X, y)
Beispiel #25
0
def with_lda(X_train_std, y_train, X_test_std, y_test):
    from sklearn.lda import LDA
    lda = LDA(n_components=2)
    X_train_lda = lda.fit_transform(X_train_std, y_train)
    lr = LogisticRegression()
    lr = lr.fit(X_train_lda, y_train)
    plot_decision_regions(X_train_lda, y_train, classifier=lr)
    plot.xlabel('LD 1')
    plot.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()

    X_test_lda = lda.transform(X_test_std)
    plot_decision_regions(X_test_lda, y_test, classifier=lr)
    plot.xlabel('LD 1')
    plot.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()
Beispiel #26
0
def get_LDA(X_std, y):
    sklearn_lda = LDA(n_components=2)
    Xred_lda = sklearn_lda.fit_transform(X_std, y)
    cmap = plt.cm.get_cmap('Accent')
    mclasses = (1, 2, 3, 4, 5, 6, 7, 8, 9)
    mcolors = [cmap(i) for i in np.linspace(0, 1, 10)]
    plt.figure(figsize=(12, 8))

    for lab, col in zip(mclasses, mcolors):
        plt.scatter(Xred_lda[y == lab, 0],
                    Xred_lda[y == lab, 1],
                    label=lab,
                    c=col)

    plt.xlabel('LDA/Fisher Direction 1')
    plt.ylabel('LDA/Fisher Direction 2')
    leg = plt.legend(loc='upper right', fancybox=True)
    plt.show()
Beispiel #27
0
def LDA10Fold(X, y):
    acc = []
    kf = KFold(X.shape[0], n_folds=10, shuffle=True)
    i = 0
    for train_index, test_index in kf:
        yTest = y[test_index]
        yTrain = y[train_index]
        clf = LDA()
        clf.fit(X[train_index], yTrain)
        newRepTrain = clf.transform(X[train_index])
        newRepTest = clf.transform(X[test_index])
        nclf = neighbors.KNeighborsClassifier(n_neighbors=2)
        nclf.fit(newRepTrain, yTrain)
        XPred = nclf.predict(newRepTest)
        acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0])
        # print i,":",acc[i]
        i += 1
    return np.mean(acc), np.std(acc)
Beispiel #28
0
def runTestPairs(e):
    x = e[0]
    y = e[1]
    trainX = labelsmaptra[x] + labelsmaptra[y]
    labelsX = [x] * len(labelsmaptra[x]) + [y] * len(labelsmaptra[y])

    clf = LDA()
    clf.fit(trainX, labelsX)

    testX = labelsmaptes[x] + labelsmaptes[y]
    labelsX = [x] * len(labelsmaptes[x]) + [y] * len(labelsmaptes[y])
    error = 0
    for lab, test in zip(labelsX, testX):
        pred = clf.predict(test)
        if lab != pred:
            error += 1
    print e, error, error / float(len(testX))
    return (e, error, error / float(len(testX)))
Beispiel #29
0
def multi_classifier():
    classifiers = [
        KNeighborsClassifier(4),
        SVC(kernel="linear", C=0.025),
        SVC(),
        #####GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        #DecisionTreeClassifier(max_depth=7),
        #RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        #RandomForestClassifier(),
        LDA(),
        AdaBoostClassifier(),
        #GaussianNB(),
        #QuadraticDiscriminantAnalysis()
    ]
    for clf in classifiers:
        clf.fit(sx, sy)
        py = clf.predict(tx)
        print accuracy_score(ty, py)
Beispiel #30
0
    def trainDayNightClassifier(self):
        """
        Trains model classifier, given that an histogram feature matrix was 
        created for day and night.
        
        The method trains an LDA. Have a look at diary of 07/05/2013 for an
        experiment, showing that LDA is more robust to wrong labels than SVM.
        """
        if self.histNight is None or self.histDay is None:
            raise RuntimeError("day or night histogram was not computed " + \
                                "before calling this function")

        hist = np.concatenate((self.histDay, self.histNight))
        lbl = np.concatenate((np.zeros(
            (len(self.histDay))), np.ones((len(self.histNight)))))

        self.modelClassifier = LDA()
        self.modelClassifier.fit(hist, lbl)