def test_LabelBinarizer2(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr)

        lb = s.preprocessing.LabelBinarizer()
        s.fit(lb)

        binarized = s.transform(lb)
        self.assertTrue(isinstance(binarized, pdml.ModelFrame))

        expected = pd.DataFrame({0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0]})
        self.assert_frame_equal(binarized, expected)

        df = pdml.ModelFrame(datasets.load_iris())
        df.target.fit(lb)
        binarized = df.target.transform(lb)

        expected = pd.DataFrame({0: [1] * 50 + [0] * 100,
                                 1: [0] * 50 + [1] * 50 + [0] * 50,
                                 2: [0] * 100 + [1] * 50})
        self.assert_frame_equal(binarized, expected)

        df = pdml.ModelFrame(datasets.load_iris())
        df.target.fit(lb)
        df.target = df.target.transform(lb)
        self.assertEqual(df.shape, (150, 7))
        self.assert_frame_equal(df.target, expected)
Example #2
0
def main():
    all_targets = load_iris()['target']
    data_set = load_iris()['data']

    train_set, test_set, targets, targets_test = train_test_split(data_set, all_targets, train_size=0.9)

    targets_class = (transform_target_vars(targets, class_num=0),
                     transform_target_vars(targets, class_num=1),
                     transform_target_vars(targets, class_num=2))

    for n_trees in range(1, 150, 10):
        classifiers = (GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3),
                       GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3),
                       GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3))

        classifiers[0].fit(train_set, targets_class[0])
        classifiers[1].fit(train_set, targets_class[1])
        classifiers[2].fit(train_set, targets_class[2])

        predicts = (classifiers[0].predict(test_set),
                    classifiers[1].predict(test_set),
                    classifiers[2].predict(test_set))

        fin_predict = decision_function(predicts[0], predicts[1], predicts[2])

        print "Number of trees:", n_trees, ":", accuracy_score(targets_test, fin_predict)
Example #3
0
 def setUp(self):
     self.x = datasets.load_iris().data
     self.y = datasets.load_iris().target
     # test without pretraining
     self.model = dbn([nn.layer(4, linear, dlinear),
                       nn.layer(5, tanh, dtanh),
                       nn.layer(1, linear, dlinear, bias=False)], False)
Example #4
0
def main():
    data_set = load_iris()['data']
    target_set = load_iris()['target']

    cartTree = CartTree(min_leaf_size=5)

    cartTree.fit(data_set, target_set)

    print cartTree.tree
    print target_set
    print numpy.array([int(round(cartTree.predict([x]))) for x in data_set])
def createDataSet():
    dataSet = datasets.load_iris()
    iris_X = dataSet.data
    iris_y = dataSet.target
    np.random.seed(1)
    indices = np.random.permutation(len(iris_X))
    iris_X_train = iris_X[indices[:-10]]
    iris_y_train = iris_y[indices[:-10]]
    iris_X_test  = iris_X[indices[-10:]]
    iris_y_test  = iris_y[indices[-10:]]
    dataSet = datasets.load_iris()
    return iris_X_train, iris_y_train, iris_X_test, iris_y_test
Example #6
0
def test_load_iris():
    res = load_iris()
    assert_equal(res.data.shape, (150, 4))
    assert_equal(res.target.size, 150)
    assert_equal(res.target_names.size, 3)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_iris(return_X_y=True)
    bunch = load_iris()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)
def test_sparse_k_means_init_centers():
    from sklearn.datasets import load_iris

    iris = load_iris()
    X = iris.data

    # Get a local optimum
    centers = KMeans(n_clusters=3).fit(X).cluster_centers_

    # Fit starting from a local optimum shouldn't change the solution
    np.testing.assert_allclose(
        centers,
        KMeans(n_clusters=3,
               init=centers,
               n_init=1).fit(X).cluster_centers_
    )

    # The same should be true when X is sparse
    X_sparse = sp.csr_matrix(X)
    np.testing.assert_allclose(
        centers,
        KMeans(n_clusters=3,
               init=centers,
               n_init=1).fit(X_sparse).cluster_centers_
    )
def test_plot_partial_dependence_multiclass(pyplot):
    # Test partial dependence plot function on multi-class input.
    iris = load_iris()
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, iris.target)

    grid_resolution = 25
    plot_partial_dependence(clf, iris.data, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    # now with symbol labels
    target = iris.target_names[iris.target]
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, target)

    grid_resolution = 25
    plot_partial_dependence(clf, iris.data, [0, 1],
                            target='setosa',
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)
def check_classifiers_input_shapes(name, Classifier):
    iris = load_iris()
    X, y = iris.data, iris.target
    X, y = shuffle(X, y, random_state=1)
    X = StandardScaler().fit_transform(X)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    set_fast_parameters(classifier)
    set_random_state(classifier)
    # fit
    classifier.fit(X, y)
    y_pred = classifier.predict(X)

    set_random_state(classifier)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        classifier.fit(X, y[:, np.newaxis])
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))
    assert_equal(len(w), 1, msg)
    assert_array_equal(y_pred, classifier.predict(X))
Example #10
0
def test_cross_val_predict_with_method():
    iris = load_iris()
    X, y = iris.data, iris.target
    X, y = shuffle(X, y, random_state=0)
    classes = len(set(y))

    kfold = KFold(len(iris.target))

    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
    for method in methods:
        est = LogisticRegression()

        predictions = cross_val_predict(est, X, y, method=method)
        assert_equal(len(predictions), len(y))

        expected_predictions = np.zeros([len(y), classes])
        func = getattr(est, method)

        # Naive loop (should be same as cross_val_predict):
        for train, test in kfold.split(X, y):
            est.fit(X[train], y[train])
            expected_predictions[test] = func(X[test])

        predictions = cross_val_predict(est, X, y, method=method,
                                        cv=kfold)
        assert_array_almost_equal(expected_predictions, predictions)
def test_score_memmap():
    # Ensure a scalar score of memmap type is accepted
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    tf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
    tf.write(b'Hello world!!!!!')
    tf.close()
    scores = np.memmap(tf.name, dtype=np.float64)
    score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64)
    try:
        cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
        # non-scalar should still fail
        assert_raises(ValueError, cross_val_score, clf, X, y,
                      scoring=lambda est, X, y: scores)
    finally:
        # Best effort to release the mmap file handles before deleting the
        # backing file under Windows
        scores, score = None, None
        for _ in range(3):
            try:
                os.unlink(tf.name)
                break
            except WindowsError:
                sleep(1.)
Example #12
0
def load_iris_data() :

    # load the iris dataset from the sklearn module
    iris = datasets.load_iris()

    # extract the elements of the data that are used in this exercise
    return (iris.data, iris.target, iris.target_names)
def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris
    iris = load_iris()

    for Estimator in [GaussianMixture, LinearRegression,
                      RandomForestClassifier, NMF, SGDClassifier,
                      MiniBatchKMeans]:
        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # without fitting
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est))

        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # with fitting
        est.fit(iris.data + 10, iris.target)
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est))
Example #14
0
 def testIrisSummaries(self):
     random.seed(42)
     iris = datasets.load_iris()
     classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
     classifier.fit(iris.data, iris.target, logdir='/tmp/skflow_tests/')
     score = accuracy_score(classifier.predict(iris.data), iris.target)
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Example #15
0
 def testIris_proba(self):
     random.seed(42)
     iris = datasets.load_iris()
     classifier = skflow.TensorFlowClassifier(n_classes=3)
     classifier.fit(iris.data, iris.target)
     score = log_loss(iris.target, classifier.predict_proba(iris.data))
     self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
Example #16
0
def test_feature_union():
    # basic sanity check for feature union
    iris = load_iris()
    X = iris.data
    X -= X.mean(axis=0)
    y = iris.target
    svd = TruncatedSVD(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    fs = FeatureUnion([("svd", svd), ("select", select)])
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 3))

    # check if it does the expected thing
    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())

    # test if it also works for sparse input
    # We use a different svd object to control the random_state stream
    fs = FeatureUnion([("svd", svd), ("select", select)])
    X_sp = sparse.csr_matrix(X)
    X_sp_transformed = fs.fit_transform(X_sp, y)
    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())

    # test setting parameters
    fs.set_params(select__k=2)
    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))

    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
    X_transformed = fs.fit_transform(X, y)
    assert_equal(X_transformed.shape, (X.shape[0], 8))
Example #17
0
def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = RandomizedPCA(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
Example #18
0
def test_svm():
iris = load_iris()
X, Y = zip(*[(x,y) for x,y in zip(iris.data,iris.target) if y in [0, 1]])  #Select 0, 1 data.
svm = SVM(C=1.0, kernel='rbf')
svm.fit(X, Y)
    
    svm.assert_almost_equal(svm.cost, 2.4034163345438264, sv4)
Example #19
0
def test_pipeline_methods_preprocessing_svm():
    # Test the various methods of the pipeline (preprocessing + svm).
    iris = load_iris()
    X = iris.data
    y = iris.target
    n_samples = X.shape[0]
    n_classes = len(np.unique(y))
    scaler = StandardScaler()
    pca = RandomizedPCA(n_components=2, whiten=True)
    clf = SVC(probability=True, random_state=0)

    for preprocessing in [scaler, pca]:
        pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
        pipe.fit(X, y)

        # check shapes of various prediction functions
        predict = pipe.predict(X)
        assert_equal(predict.shape, (n_samples,))

        proba = pipe.predict_proba(X)
        assert_equal(proba.shape, (n_samples, n_classes))

        log_proba = pipe.predict_log_proba(X)
        assert_equal(log_proba.shape, (n_samples, n_classes))

        decision_function = pipe.decision_function(X)
        assert_equal(decision_function.shape, (n_samples, n_classes))

        pipe.score(X, y)
def test_classification_report_multiclass():
    """Test performance report"""
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
             precision    recall  f1-score   support

     setosa       0.83      0.79      0.81        24
 versicolor       0.33      0.10      0.15        31
  virginica       0.42      0.90      0.57        20

avg / total       0.51      0.53      0.47        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names)
    assert_equal(report, expected_report)

    # print classification report with label detection
    expected_report = """\
             precision    recall  f1-score   support

          0       0.83      0.79      0.81        24
          1       0.33      0.10      0.15        31
          2       0.42      0.90      0.57        20

avg / total       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
Example #21
0
def test_classification_report():
    """Test performance report"""
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
             precision    recall  f1-score   support

     setosa       0.82      0.92      0.87        25
 versicolor       0.56      0.17      0.26        30
  virginica       0.47      0.90      0.62        20

avg / total       0.62      0.61      0.56        75
"""
    report = classification_report(
        y_true, y_pred, labels=range(len(iris.target_names)),
        target_names=iris.target_names)
    assert_equal(report, expected_report)

    # print classification report with label detection
    expected_report = """\
             precision    recall  f1-score   support

          0       0.82      0.92      0.87        25
          1       0.56      0.17      0.26        30
          2       0.47      0.90      0.62        20

avg / total       0.62      0.61      0.56        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
def test_classification_report_multiclass_with_digits():
    """Test performance report with added digits in floating point values"""
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
             precision    recall  f1-score   support

     setosa    0.82609   0.79167   0.80851        24
 versicolor    0.33333   0.09677   0.15000        31
  virginica    0.41860   0.90000   0.57143        20

avg / total    0.51375   0.53333   0.47310        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names, digits=5)
    assert_equal(report, expected_report)

    # print classification report with label detection
    expected_report = """\
             precision    recall  f1-score   support

          0       0.83      0.79      0.81        24
          1       0.33      0.10      0.15        31
          2       0.42      0.90      0.57        20

avg / total       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
Example #23
0
def main():

    # http://scikit-learn.org/stable/tutorial/basic/tutorial.html#loading-an-example-dataset
    # "A dataset is a dictionary-like object that holds all the data and some
    # metadata about the data. This data is stored in the .data member, which
    # is a n_samples, n_features array. In the case of supervised problem, one
    # or more response variables are stored in the .target member."

    # Toy datasets

    iris = datasets.load_iris()         # The iris dataset (classification)
    digits = datasets.load_digits()     # The digits dataset (classification)

    #boston = datasets.load_boston()     # The boston house-prices dataset (regression)
    #diabetes = datasets.load_diabetes() # The diabetes dataset (regression)
    #linnerud = datasets.load_linnerud() # The linnerud dataset (multivariate regression)

    print(iris.feature_names)
    print(iris.data)
    print(iris.target_names)
    print(iris.target)

    print(digits.images[0])
    print(digits.target_names)
    print(digits.target)

    plt.imshow(digits.images[0], cmap='gray', interpolation='nearest')
    plt.show()
def test_sparse_fit_params():
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    fit_params = {'sparse_sample_weight': coo_matrix(np.eye(X.shape[0]))}
    a = cval.cross_val_score(clf, X, y, fit_params=fit_params)
    assert_array_equal(a, np.ones(3))
"""
======================
Discriminant Functions
======================
"""
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report

from sklvq import GLVQ
from sklvq.discriminants import DiscriminantBaseClass

data, labels = load_iris(return_X_y=True)

###############################################################################
# The sklvq package contains a single discriminant function and additions are very welcome. Note
# that they should work with the sklvq.objectives.GeneralizedLearningObjective, i.e.,
# passing additional or different arguments is not possible.


# The discriminative function is depended on the objective function. This determines the
# parameters of the call and gradient. See sklvq.objective.GeneralizedLearningObjective.
class CustomRelativeDistance(DiscriminantBaseClass):
    def __call__(self, dist_same: np.ndarray,
                 dist_diff: np.ndarray) -> np.ndarray:
        # dist_same = distance to prototype with same label as X.
        # dist_diff = distance to prototype with different label as X.
        return (dist_same - dist_diff) / (dist_same + dist_diff)

    def gradient(self, dist_same: np.ndarray, dist_diff: np.ndarray,
                 winner_same: bool) -> np.ndarray:
Example #26
0
    cluster = tf.contrib.factorization.KMeansClustering(
        num_clusters=numClusters,
        initial_clusters=tf.contrib.factorization.KMeansClustering.
        KMEANS_PLUS_PLUS_INIT)
    cluster.train(input_fn=get_inputs, steps=2000)
    y_pred = cluster.predict_cluster_index(input_fn=get_inputs)
    y_pred = np.asarray(list(y_pred))
    return y_pred


def plotFigure(fignum, title, X, y):
    fig = plt.figure(fignum, figsize=(8, 6))
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y.astype(np.float), edgecolor='k')
    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])
    ax.set_xlabel('Petal width')
    ax.set_ylabel('Sepal length')
    ax.set_zlabel('Petal length')
    ax.set_title(title)
    ax.dist = 10
    fig.show()


if __name__ == '__main__':
    # sess = tf.Session()
    X, y = loadData(datasets.load_iris())
    y_pred = kmeansCluster(X, 1)
    plotFigure(1, "3 clusters", X, y_pred)
    plotFigure(2, "Ground Truth", X, y)
Example #27
0
def iris():
    """Return the Iris data set feature matrix."""

    X, _ = load_iris(return_X_y=True)
    return X
Example #28
0
def train(output_dir='outputs', kernel='linear', penalty=1.0):
    # make sure output directory exist
    os.makedirs(output_dir, exist_ok=True)

    # Safely get the Azure ML run
    run = get_AMLRun()

    # loading the iris dataset
    iris = datasets.load_iris()

    # X -> features, y -> label
    X = iris.data
    y = iris.target
    class_names = iris.target_names

    # dividing X, y into train and test data. Random seed for reproducability
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.20, random_state=0)

    # create our model - a linear SVM classifier
    svm_model_linear = SVC(kernel=kernel, C=penalty)

    # evaluate each model in turn
    kfold = StratifiedKFold(n_splits=10, random_state=1)
    cv_results = cross_val_score(svm_model_linear,
                                 X_train,
                                 y_train,
                                 cv=kfold,
                                 scoring='accuracy')

    print('Cross Validation Mean: ', cv_results.mean())
    print('Cross Validation Std: ', cv_results.std())
    if run is not None:
        run.log_list('Cross Validation Accuracies', cv_results)
        run.log('Cross Validation Mean', cv_results.mean())
        run.log('Cross Validation Std', cv_results.std())

    # now training on the full dataset
    svm_model_linear.fit(X_train, y_train)
    y_pred = svm_model_linear.predict(X_test)

    # model accuracy for X_test
    accuracy = svm_model_linear.score(X_test, y_test)
    print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
    if run is not None:
        run.log('Accuracy', np.float(accuracy))

    # Plot non-normalized confusion matrix
    title = 'Test confusion matrix'
    disp = plot_confusion_matrix(svm_model_linear,
                                 X_test,
                                 y_test,
                                 display_labels=class_names,
                                 cmap=plt.cm.Blues)
    disp.ax_.set_title(title)
    print(title)
    print(disp.confusion_matrix)

    if run is not None:
        run.log_image(title, plot=plt)
    else:
        plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))

    # Plot normalized confusion matrix
    title = 'Normalized test confusion matrix'
    disp = plot_confusion_matrix(svm_model_linear,
                                 X_test,
                                 y_test,
                                 display_labels=class_names,
                                 cmap=plt.cm.Blues,
                                 normalize='true')
    disp.ax_.set_title(title)
    print(title)
    print(disp.confusion_matrix)

    if run is not None:
        run.log_image(title, plot=plt)
    else:
        plt.savefig(os.path.join(output_dir,
                                 'confusion_matrix_normalised.png'))

    # Print classification report
    print(classification_report(y_test, y_pred))

    # files saved in the "outputs" folder are automatically uploaded into
    # Azure ML Service run history
    model_folder = os.path.join(output_dir, 'model')
    model_path = os.path.join(model_folder, 'covid-tweets-analyis.joblib')
    os.makedirs(model_folder, exist_ok=True)
    joblib.dump(svm_model_linear, model_path)
    print('Output saved to', output_dir)
def loadData():
    iris = datasets.load_iris()
    E = np.random.uniform(0, 0.1, size=(len(iris.data), 20))
    X = np.hstack((iris.data, E))
    y = iris.target
    return X, y
Example #30
0
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import plotly.express as px

iris = load_iris()  # It returns simple dictionary like object with all data.
print("IRIS Dataset Size : ", iris.data.shape, iris.target.shape)
print("IRIS Flower Names : ", iris.target_names)
print("IRIS Flower Feature Names : ", iris.feature_names)

# Creating dataframe of total data
iris_df = pd.DataFrame(data=np.concatenate((iris.data, iris.target.reshape(-1, 1)), axis=1),
                       columns=(iris.feature_names + ['Flower Type']))
iris_df["Flower Name"] = [iris.target_names[int(i)] for i in iris_df["Flower Type"]]
print(iris_df.head())

chart1 = px.scatter(data_frame=iris_df,
                    x="sepal length (cm)",
                    y="petal length (cm)",
                    color="Flower Name",
                    size=[1.0] * 150,
                    title="sepal length (cm) vs petal length (cm) color-encoded by flower type")
chart1
Example #31
0
def generate_train_data_iris():
    iris = datasets.load_iris()
    return iris['data'][:, 2:4], iris['target']
Example #32
0
 def __init__(self):
     self.iris = datasets.load_iris()
    margin = 1 / np.sqrt(np.sum(linear_svm.coef_**2))
    yy_down = yy - np.sqrt(1 + a**2) * margin
    yy_up = yy + np.sqrt(1 + a**2) * margin

    plt.plot(xx, yy, 'k-')
    plt.plot(xx, yy_down, 'k--')
    plt.plot(xx, yy_up, 'k--')


style.use('ggplot')

# create linear SVM
linear_svm = svm.SVC(kernel='linear')

# import source data
iris = load_iris()

# setup source dataframe
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['is_train'] = np.random.uniform(0, 1, len(df)) <= 0.75
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
df['species_codes'] = df['species'].cat.codes

# Linear SVM: Separable  - take only two classes from the iris dataset

# create training set
X_tr = df[df['is_train'] == True]
X_tr = pd.concat(
    [X_tr[X_tr['species_codes'] == 0], X_tr[X_tr['species_codes'] == 1]])
y_tr = X_tr['species_codes']
Example #34
0
from sklearn.datasets import load_iris
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
"""
训练鸢尾花数据集
"""
# 1、 读入数据
x_data = load_iris().data
y_data = load_iris().target
# x = load_iris().data
# y_data = load_iris().target
# x_data = DataFrame(x,columns=load_iris().feature_names)
# pd.set_option("display.unicode.east_asian_width",True) # 设置显示方式
# print("x_data add index:\n",x_data)
# x_data["Type"] = y_data # 加一个分类列
# print("x_data add index:\n",x_data)

# 2、数据乱序
# 这里没有使用sklearn自带的数据集划分
# x_data 为数据 包括特征和分类
np.random.seed(116)  #随机数种子
np.random.shuffle(x_data)
np.random.seed(116)  #随机数种子
np.random.shuffle(y_data)
tf.random.set_seed(116)

# 3、将数据集分成训练集和测试集
# (手动划分测试集和训练集)
# 训练集和测试集之间没有交集
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets

if __name__ == '__main__':
    irysy = datasets.load_iris()
    X = irysy.data[:, :2]
    y = irysy.target

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    h = (x_max / x_min) / 100
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    svm = svm.SVC(kernel='linear', C=2.0)
    svm.fit(X, y)

    X_plot = np.c_[xx.ravel(), yy.ravel()]

    Z = svm.predict(X_plot)
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.contour(xx, yy, Z)
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.show()
Example #36
0
def main():
    # Loaded Dataset
    iris = datasets.load_iris()
Example #37
0
# 가장 느리지만 성능은 가장 좋다.

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
import time

#1. 데이터
dataset = load_iris()

x_train, x_test, y_train, y_test = train_test_split(dataset.data,
                                                    dataset.target,
                                                    train_size=0.8,
                                                    random_state=33)

start = time.time()

#2. 모델
model = XGBClassifier(n_jobs=8,
                      use_label_encoder=False)  #n_jobs = -1 => cpu 자원을 모두 쓰겠다.
# use_label_encoder=False warning 안뜨게 하는 법

#3. 훈련
model.fit(x_train, y_train, eval_metric='logloss')
Example #38
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 15 14:12:15 2018

@author: chengch
"""

import numpy as np
import matplotlib.pyplot  as plt
import sklearn

from sklearn.datasets import load_iris

oringal_data = load_iris()
data = oringal_data['data']

def get_cov(data,k=1):
    for i in range(len(data[1])):
        data[:,i] = data[:,i] - np.mean(data[:,i])
    covariance_matrix = []
    for j in range(len(data[1])):
        for k in range(len(data[1])):
            dat = np.dot(data[:,j],data[:,k]) / (len(data) - 1)
            covariance_matrix.append(dat)
    covariance_matrix = np.reshape(covariance_matrix,(len(data[1]),len(data[1])))

    us,vs = np.linalg.eig(covariance_matrix)
    us_ind = np.argsort(-us)
    result = []
    for ind in range(k):
Example #39
0
import sys, os
import numpy as np
sys.path.append(os.getcwd() + r'\Modules')
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from Perceptron_Class import Perceptron

iris = datasets.load_iris()  # 读出鸢尾花数据
X = iris["data"][:, (2, 3)]  # 选出花瓣长、花瓣宽作为特征组属性成员
y = 2 * (iris["target"] == 2).astype(
    np.int64) - 1  # 生成数据的标签(是:+1(True) -> +1,不是:0(False) -> -1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=5)

model = Perceptron()
model.fit(X_train, y_train)
model.predict(X_test)

plt.style.use('seaborn-darkgrid')  # 在两个坐标系中分别画出训练数据点与测试数据点

fig, axs = plt.subplots(1, 2,
                        figsize=(9, 4))  # a figure with a 1x2 grid of Axes
axs[0].scatter(X_train[:, 0],
               X_train[:, 1],
               c=list(y_train),
               cmap=plt.cm.seismic,
               edgecolors='none',
               s=6)
Example #40
0
# 200622_25
# iris, 다중 분류, 완성체


from sklearn.feature_selection import SelectFromModel
import numpy as np
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston, load_breast_cancer, load_iris
from sklearn.metrics import accuracy_score, r2_score


### 데이터 ###
x, y = load_iris(return_X_y=True)
print(x.shape)      # (150, 4)
print(y.shape)      # (150, )

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8,
                 shuffle = True, random_state = 66)


### 모델 ###
model = XGBClassifier(objective='multi:softmax', n_estimators=300, learning_rate=0.1)


### 훈련 ###
model.fit(x_train, y_train, verbose=True, eval_metric=['mlogloss','merror'],
                eval_set=[(x_train, y_train), (x_test, y_test)],
                early_stopping_rounds=20)

# Stopping. Best iteration: [0]
Example #41
0
from sklearn.datasets import load_iris
from sklearn import tree

# 1. Lets Create Data Set
irisData = load_iris()
print("===IRIS DATASET===")
print(irisData)
print(type(irisData))

print()

# Array of Features :)
print("===IRIS DATA FEATURES===")
print(irisData.data)

print()

# Array of Targets
print("===IRIS DATA TARGET===")
print(irisData.target)

print()

# Array of Target Names
print("===IRIS DATA TARGET NAMES===")
print(irisData.target_names)

# 2. Lets Create Model
model = tree.DecisionTreeClassifier()

# 3. Train the Model | Supervised Learning
Example #42
0
def load_data():
    iris=datasets.load_iris()
    x_train=iris.data
    y_train=iris.target
    return train_test_split(x_train, y_train, test_size=0.25, random_state=0, stratify=y_train)
def iris_dataframe():
    iris = datasets.load_iris()
    return pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2])
Example #44
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'sklearn来进行模型评估,还是山鸢尾'
import numpy as np
import pandas as pd
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# 加载山鸢尾数据集合:
scikit_iris = datasets.load_iris()
pd_iris = pd.DataFrame(
    data=np.c_[scikit_iris['data'], scikit_iris['target']],
    columns=np.append(scikit_iris['feature_names'], 'y')
)
x = pd_iris[scikit_iris['feature_names']]
y = pd_iris['y']
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=0)
knn=KNeighborsClassifier(n_neighbors=10)
knn.fit(x_train,y_train)
# 由于训练集合和测试集合并没有本质区别,所以让模型分别测试:
y_predict_on_train=knn.predict(x_train)
y_predict_on_test=knn.predict(x_test)
print('准确率为:{:.2%}'.format(metrics.accuracy_score(y_train,y_predict_on_train)))
print('准确率为:{:.2%}'.format(metrics.accuracy_score(y_test,y_predict_on_test)))
Example #45
0
def test_iris_f_min(op, num_folds=5):
    from sklearn import datasets

    iris = datasets.load_iris()
    return test_f_min(op, iris.data, iris.target, num_folds=num_folds)
    def test_pipeline_column_transformer(self):

        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1"
                                              if x > 0.5 else "cat2")
        X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3"
                                               if x > 0.5 else "cat4")
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3)

        numeric_transformer = Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="median")),
            ("scaler", StandardScaler()),
        ])

        categorical_transformer = Pipeline(steps=[
            (
                "onehot",
                OneHotEncoder(sparse=True, handle_unknown="ignore"),
            ),
            (
                "tsvd",
                TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4),
            ),
        ])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)
        initial_type = [
            ("numfeat", FloatTensorType([None, 3])),
            ("strfeat", StringTensorType([None, 2])),
        ]

        X_train = X_train[:11]
        model_onnx = convert_sklearn(model, initial_types=initial_type,
                                     target_opset=TARGET_OPSET)

        dump_data_and_model(
            X_train, model, model_onnx,
            basename="SklearnPipelineColumnTransformerPipeliner")

        if __name__ == "__main__":
            from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer

            pydot_graph = GetPydotGraph(
                model_onnx.graph,
                name=model_onnx.graph.name,
                rankdir="TP",
                node_producer=GetOpNodeProducer("docstring"))
            pydot_graph.write_dot("graph.dot")

            import os

            os.system("dot -O -G=300 -Tpng graph.dot")
Example #47
0
import tensorflow as tf
from sklearn.datasets import load_iris
data = load_iris()

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33, random_state=42)

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=50)
model.evaluate(x_test, y_test)
Example #48
0
def get_dataset():
    iris = datasets.load_iris()
    names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    dataset = pd.DataFrame(iris.data, columns=names)
    dataset['class'] = iris.target
    return dataset
Example #49
0
 def setUp(self):
     self.X, self.y = load_iris(return_X_y=True)
Example #50
0
 def test_feature_importances_single_leaf(self):
     clf = lgb.LGBMClassifier(n_estimators=100)
     data = load_iris()
     clf.fit(data.data, data.target)
     importances = clf.feature_importances_
     self.assertEqual(len(importances), 4)
                per[component, :].T.reshape(-1, 1) @ self.W[component, :].T.reshape(1, -1)

    def save_remodelling_components(self):
        self.save_result('remodelling_components.csv', self.scaled_remodelling_components)

    def save_transformed_data(self):
        self.save_result('scores.csv', self.T)


# -----PLS testing--------------------------------------------------------------------------------------------------
if __name__ == '__main__':
    path_to_data = os.path.join(str(Path.home()), 'Deformetrica', 'deterministic_atlas_ct',
                                'output_separate_tmp10_def10_prttpe13_corrected', 'Decomposition')
    data_filename = 'Momenta_Table.csv'

    data, target = load_iris(return_X_y=True)
    data = data[0:80, 0:3]
    target = target[0:80]
    pls = PLSBinaryClassification(dataset_filename=data_filename, dataset_path=path_to_data, X=data, y=target)
    pls.decompose_with_pls(method='da')

    plsr = PLSRegression(3, scale=False)
    x_plsr, y_plsr = plsr.fit_transform(pls.X_centered, pls.y)

    plt.scatter(plsr.x_scores_[pls.y == 1, 0], plsr.x_scores_[pls.y == 1, 1], c='red', marker='d')
    plt.scatter(plsr.x_scores_[pls.y == -1, 0], plsr.x_scores_[pls.y == -1, 1], c='blue', marker='x')
    x = np.linspace(-2, 2, 100)

    print('W:\n {}'.format(pls.W))
    print('xw:\n {}'.format(plsr.x_weights_))
    print('T:\n {}'.format(pls.T))
Example #52
0
 def setUpClass(self):
   # runs once per test class
   iris_data = load_iris()
   self.X = iris_data['data']
   self.y = iris_data['target']
import numpy as np
from IPython.display import display

from sklearn import datasets
iris = datasets.load_iris()

x = iris['data'][:, [2, 3]]
X = np.c_[np.ones((len(x), 1)), x]
theta = np.array([0, 0, 0]).reshape(-1, 1)
y = (iris['target'] == 2).reshape(-1, 1).astype(int)

from scipy.special import expit


# = expit, maar deze vorm in hastie (120)
def fct(z):
    return np.exp(z) / (np.exp(z) + 1)


for i in range(0, 100):
    z = X.dot(theta)
    a = fct(z)
    grad = X.T.dot(y - a)
    D = np.diag((-a * (1 - a)).ravel())
    H = X.T.dot(D).dot(X)
    theta = theta - np.linalg.inv(H).dot(grad)

display(theta)

import matplotlib.pyplot as plt
import seaborn
import tensorflow as tf
from sklearn import datasets
from matplotlib import pyplot as plt
import numpy as np

#导入数据
x_data = datasets.load_iris().data
y_data = datasets.load_iris().target

'''
打乱顺序
seed为随机种子,保证每次生成的随机数一样
'''
np.random.seed(116)
np.random.shuffle(x_data)
np.random.seed(116)
np.random.shuffle(y_data)
tf.random.set_seed(116)

#拆分训练集和测试集,前120组作为训练集,后30组作为测试机
x_train = x_data[:-30]
y_train = y_data[:-30]

x_test = x_data[-30:]
y_test = y_data[-30:]

#转化数据,将x都转化为同一类数据
x_train = tf.cast(x_train,tf.float32)
x_test = tf.cast(x_test,tf.float32)

'''