예제 #1
0
def test():
    X, y = load_data(return_X_y=True)

    nb = GaussianNB()
    nb.fit(X, y)
    probas = nb.predict_proba(X)
    plot_precision_recall_curve_with_cv(nb, X, y)
    plt.show()
예제 #2
0
 def test_two_classes(self):
     np.random.seed(0)
     # Test this one on Iris (3 classes)
     X, y = load_data(return_X_y=True)
     clf = LogisticRegression()
     clf.fit(X, y)
     probas = clf.predict_proba(X)
     self.assertRaises(ValueError, plot_lift_curve, y, probas)
예제 #3
0
 def test_biplot(self):
     np.random.seed(0)
     clf = PCA()
     clf.fit(self.X)
     ax = plot_pca_2d_projection(clf,
                                 self.X,
                                 self.y,
                                 biplot=True,
                                 feature_labels=load_data().feature_names)
def run_example(depth: int = 2):
    features, label = load_data(return_X_y=True)
    p = features.shape[1]
    column_names = ["x{0}".format(i) for i in range(p)]
    data = pd.DataFrame(data=features, columns=column_names)
    data["label"] = label

    test_indices = np.random.random_integers(0,
                                             data.shape[0] - 1,
                                             size=(int(data.shape[0] * 0.2), ))
    train_indices = [
        i for i in range(0, data.shape[0]) if i not in test_indices
    ]
    train = data.iloc[train_indices, ].reset_index()
    test = data.iloc[test_indices, ].reset_index()

    print(train.shape)

    # Use sklearn
    train_features_sklearn = features[train_indices, ::]
    train_label_sklearn = label[train_indices]
    test_features_sklearn = features[test_indices, ::]
    test_label_sklearn = label[test_indices]
    cart_model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=1)
    clf = cart_model.fit(train_features_sklearn, train_label_sklearn)
    predicted_y = clf.predict(test_features_sklearn)

    # Use PyOptree
    model = OptimalHyperTreeModel(column_names,
                                  "label",
                                  tree_depth=depth,
                                  N_min=1)
    model.train(train, train_method="mio")

    test = model.predict(test)

    print(model.a)

    print("PyOptree Library Tree Prediction Accuracy: {}".format(
        sum(test["prediction"] == test["label"]) / len(test["label"])))

    print("SKLearn Library Tree Prediction Accuracy: {}".format(
        sum(predicted_y == test_label_sklearn) / len(test_label_sklearn)))
예제 #5
0
"""
An example showing the plot_roc_curve method
used by a scikit-learn classifier
"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits as load_data
import scikitplot as skplt


X, y = load_data(return_X_y=True)
nb = GaussianNB()
nb.fit(X, y)
probas = nb.predict_proba(X)
skplt.metrics.plot_roc(y_true=y, y_probas=probas)
plt.show()
예제 #6
0
 def setUp(self):
     np.random.seed(0)
     self.X, self.y = load_data(return_X_y=True)
     p = np.random.permutation(len(self.X))
     self.X, self.y = self.X[p], self.y[p]
예제 #7
0
 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
예제 #8
0
 def setUp(self):
     np.random.seed(0)
     self.X, self.y = load_data(return_X_y=True)
     p = np.random.permutation(len(self.X))
     self.X, self.y = self.X[p], self.y[p]
예제 #9
0
 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
예제 #10
0
and all pairs of variables (based on columns) are considered.

To better display the values of the correlation coefficient, the colors
used for the annotation of the values in the plot can be selected
with the parameter ``textcolors`` of the
:py:meth:`psynlig.heatmap.plot_heatmap` method (please see the
:ref:`documentation <api-heatmap>` for more information).
"""
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap

plt.style.use('seaborn-talk')

data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])

kwargs = {
    'text': {
        'fontsize': 'large',
    },
    'heatmap': {
        'vmin': -1,
        'vmax': 1,
        'cmap': 'viridis',
    },
    'figure': {
        'figsize': (14, 10)
    },
}
#visualisering

#kjøre benchmark

#t-sne og PCA med varians
# kjør clustering på t-sne
#sjekk feature importance opp mot de 50 andre


#feature importance
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris as load_data
import matplotlib.pyplot as plt
from scikitplot import classifier_factory

X1, y1 = load_data(return_X_y=True)
X1.shape
y1.shape
X.shape
y.shape
rf = classifier_factory(RandomForestClassifier(random_state=1))
rf.fit(X, Y)
rf.plot_feature_importances(feature_names=["feature"+str(i)for i in range(50)])
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
rf = RandomForestClassifier()
rf = rf.fit(X, y)
skplt.plot_feature_importances(rf, feature_names=['petal length', 'petal width',
                                                  'sepal length', 'sepal width'])
예제 #12
0
 def test_biplot(self):
     np.random.seed(0)
     clf = PCA()
     clf.fit(self.X)
     ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True,
                                 feature_labels=load_data().feature_names)
예제 #13
0
def getData():
    data_map = load_data()
    X = data_map['data']
    y = data_map['target']
    y[np.where(y == 0)] = -1
    return scale(X), y
예제 #14
0
import numpy as np
from sklearn.datasets import fetch_20newsgroups as load_data
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dataset = load_data(
    categories=["alt.atheism", "soc.religion.christian", "talk.politics.guns"],
    shuffle=True,
)
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

bow = TfidfVectorizer().fit(X_train)
X_train_bow = bow.transform(X_train)
X_test_bow = bow.transform(X_test)

logreg = LogisticRegression(multi_class="auto").fit(X_train_bow, y_train)
y_test_hat = logreg.predict_proba(X_test_bow)
print(accuracy_score(np.argmax(y_test_hat, axis=1), y_test))
예제 #15
0
import time

from sklearn.datasets import load_iris as load_data
from sklearn.kernel_approximation import RBFSampler
from sklearn.utils import shuffle

from libifbtsvm import iFBTSVM
from libifbtsvm.models.ifbtsvm import Hyperparameters


if __name__ == '__main__':

    dataset = load_data()
    dataset.data, dataset.target = shuffle(dataset.data, dataset.target)

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=1e-9,
        kernel=RBFSampler(gamma=1, n_components=20),
        forget_score=5,
    )

    # Initialisation iFBTSVM
    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)
예제 #16
0
from sklearn.datasets import load_iris as load_data
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import MachineLearning.DecisionTree.decisionTree as decisionTree
import MachineLearning.DecisionTree.tool as tool
from MachineLearning.DecisionTree.drawTree import *
if __name__ == '__main__':
    print('load_data......')
    dataSet = load_data()
    data = dataSet.data
    target = dataSet.target
    dataframe = pd.DataFrame(data = data, dtype = np.float32)
    dataframe.insert(4, 'label', target)
    dataMat = np.mat(dataframe)

    '''test and train
    '''
    X_train, X_test, y_train, y_test = train_test_split(dataMat[:, 0:-1], dataMat[:, -1], test_size=0.3, random_state=0)
    data_train = np.hstack((X_train, y_train))
    data_train = data_train.tolist()
    X_test = X_test.tolist()
    tree = decisionTree.decision_tree()
    tree_root = tree.build_tree(data_train)
    predictions = tree.predcit_samples(X_test, tree_root)
    pres = []
    for i in predictions:
        pres.append(list(i.keys()))

    y_test = y_test.tolist()
    accuracy = 0