def test(): X, y = load_data(return_X_y=True) nb = GaussianNB() nb.fit(X, y) probas = nb.predict_proba(X) plot_precision_recall_curve_with_cv(nb, X, y) plt.show()
def test_two_classes(self): np.random.seed(0) # Test this one on Iris (3 classes) X, y = load_data(return_X_y=True) clf = LogisticRegression() clf.fit(X, y) probas = clf.predict_proba(X) self.assertRaises(ValueError, plot_lift_curve, y, probas)
def test_biplot(self): np.random.seed(0) clf = PCA() clf.fit(self.X) ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True, feature_labels=load_data().feature_names)
def run_example(depth: int = 2): features, label = load_data(return_X_y=True) p = features.shape[1] column_names = ["x{0}".format(i) for i in range(p)] data = pd.DataFrame(data=features, columns=column_names) data["label"] = label test_indices = np.random.random_integers(0, data.shape[0] - 1, size=(int(data.shape[0] * 0.2), )) train_indices = [ i for i in range(0, data.shape[0]) if i not in test_indices ] train = data.iloc[train_indices, ].reset_index() test = data.iloc[test_indices, ].reset_index() print(train.shape) # Use sklearn train_features_sklearn = features[train_indices, ::] train_label_sklearn = label[train_indices] test_features_sklearn = features[test_indices, ::] test_label_sklearn = label[test_indices] cart_model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=1) clf = cart_model.fit(train_features_sklearn, train_label_sklearn) predicted_y = clf.predict(test_features_sklearn) # Use PyOptree model = OptimalHyperTreeModel(column_names, "label", tree_depth=depth, N_min=1) model.train(train, train_method="mio") test = model.predict(test) print(model.a) print("PyOptree Library Tree Prediction Accuracy: {}".format( sum(test["prediction"] == test["label"]) / len(test["label"]))) print("SKLearn Library Tree Prediction Accuracy: {}".format( sum(predicted_y == test_label_sklearn) / len(test_label_sklearn)))
""" An example showing the plot_roc_curve method used by a scikit-learn classifier """ from __future__ import absolute_import import matplotlib.pyplot as plt from sklearn.naive_bayes import GaussianNB from sklearn.datasets import load_digits as load_data import scikitplot as skplt X, y = load_data(return_X_y=True) nb = GaussianNB() nb.fit(X, y) probas = nb.predict_proba(X) skplt.metrics.plot_roc(y_true=y, y_probas=probas) plt.show()
def setUp(self): np.random.seed(0) self.X, self.y = load_data(return_X_y=True) p = np.random.permutation(len(self.X)) self.X, self.y = self.X[p], self.y[p]
def test_two_classes(self): clf = LogisticRegression() scikitplot.classifier_factory(clf) X, y = load_data(return_X_y=True) self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
and all pairs of variables (based on columns) are considered. To better display the values of the correlation coefficient, the colors used for the annotation of the values in the plot can be selected with the parameter ``textcolors`` of the :py:meth:`psynlig.heatmap.plot_heatmap` method (please see the :ref:`documentation <api-heatmap>` for more information). """ from matplotlib import pyplot as plt import pandas as pd from sklearn.datasets import load_wine as load_data from psynlig import plot_correlation_heatmap plt.style.use('seaborn-talk') data_set = load_data() data = pd.DataFrame(data_set['data'], columns=data_set['feature_names']) kwargs = { 'text': { 'fontsize': 'large', }, 'heatmap': { 'vmin': -1, 'vmax': 1, 'cmap': 'viridis', }, 'figure': { 'figsize': (14, 10) }, }
#visualisering #kjøre benchmark #t-sne og PCA med varians # kjør clustering på t-sne #sjekk feature importance opp mot de 50 andre #feature importance from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris as load_data import matplotlib.pyplot as plt from scikitplot import classifier_factory X1, y1 = load_data(return_X_y=True) X1.shape y1.shape X.shape y.shape rf = classifier_factory(RandomForestClassifier(random_state=1)) rf.fit(X, Y) rf.plot_feature_importances(feature_names=["feature"+str(i)for i in range(50)]) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt rf = RandomForestClassifier() rf = rf.fit(X, y) skplt.plot_feature_importances(rf, feature_names=['petal length', 'petal width', 'sepal length', 'sepal width'])
def getData(): data_map = load_data() X = data_map['data'] y = data_map['target'] y[np.where(y == 0)] = -1 return scale(X), y
import numpy as np from sklearn.datasets import fetch_20newsgroups as load_data from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split dataset = load_data( categories=["alt.atheism", "soc.religion.christian", "talk.politics.guns"], shuffle=True, ) X, y = dataset.data, dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) bow = TfidfVectorizer().fit(X_train) X_train_bow = bow.transform(X_train) X_test_bow = bow.transform(X_test) logreg = LogisticRegression(multi_class="auto").fit(X_train_bow, y_train) y_test_hat = logreg.predict_proba(X_test_bow) print(accuracy_score(np.argmax(y_test_hat, axis=1), y_test))
import time from sklearn.datasets import load_iris as load_data from sklearn.kernel_approximation import RBFSampler from sklearn.utils import shuffle from libifbtsvm import iFBTSVM from libifbtsvm.models.ifbtsvm import Hyperparameters if __name__ == '__main__': dataset = load_data() dataset.data, dataset.target = shuffle(dataset.data, dataset.target) params = Hyperparameters( epsilon=1e-10, fuzzy=0.01, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=1e-9, kernel=RBFSampler(gamma=1, n_components=20), forget_score=5, ) # Initialisation iFBTSVM ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)
from sklearn.datasets import load_iris as load_data from sklearn.model_selection import train_test_split import pandas as pd import numpy as np import MachineLearning.DecisionTree.decisionTree as decisionTree import MachineLearning.DecisionTree.tool as tool from MachineLearning.DecisionTree.drawTree import * if __name__ == '__main__': print('load_data......') dataSet = load_data() data = dataSet.data target = dataSet.target dataframe = pd.DataFrame(data = data, dtype = np.float32) dataframe.insert(4, 'label', target) dataMat = np.mat(dataframe) '''test and train ''' X_train, X_test, y_train, y_test = train_test_split(dataMat[:, 0:-1], dataMat[:, -1], test_size=0.3, random_state=0) data_train = np.hstack((X_train, y_train)) data_train = data_train.tolist() X_test = X_test.tolist() tree = decisionTree.decision_tree() tree_root = tree.build_tree(data_train) predictions = tree.predcit_samples(X_test, tree_root) pres = [] for i in predictions: pres.append(list(i.keys())) y_test = y_test.tolist() accuracy = 0