def test_LabelBinarizer2(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr) lb = s.preprocessing.LabelBinarizer() s.fit(lb) binarized = s.transform(lb) self.assertTrue(isinstance(binarized, pdml.ModelFrame)) expected = pd.DataFrame({0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0]}) self.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) binarized = df.target.transform(lb) expected = pd.DataFrame({0: [1] * 50 + [0] * 100, 1: [0] * 50 + [1] * 50 + [0] * 50, 2: [0] * 100 + [1] * 50}) self.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) df.target = df.target.transform(lb) self.assertEqual(df.shape, (150, 7)) self.assert_frame_equal(df.target, expected)
def main(): all_targets = load_iris()['target'] data_set = load_iris()['data'] train_set, test_set, targets, targets_test = train_test_split(data_set, all_targets, train_size=0.9) targets_class = (transform_target_vars(targets, class_num=0), transform_target_vars(targets, class_num=1), transform_target_vars(targets, class_num=2)) for n_trees in range(1, 150, 10): classifiers = (GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3), GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3), GradientBoostingClassifier(n_trees=n_trees, max_tree_depth=1, n_features=3)) classifiers[0].fit(train_set, targets_class[0]) classifiers[1].fit(train_set, targets_class[1]) classifiers[2].fit(train_set, targets_class[2]) predicts = (classifiers[0].predict(test_set), classifiers[1].predict(test_set), classifiers[2].predict(test_set)) fin_predict = decision_function(predicts[0], predicts[1], predicts[2]) print "Number of trees:", n_trees, ":", accuracy_score(targets_test, fin_predict)
def setUp(self): self.x = datasets.load_iris().data self.y = datasets.load_iris().target # test without pretraining self.model = dbn([nn.layer(4, linear, dlinear), nn.layer(5, tanh, dtanh), nn.layer(1, linear, dlinear, bias=False)], False)
def main(): data_set = load_iris()['data'] target_set = load_iris()['target'] cartTree = CartTree(min_leaf_size=5) cartTree.fit(data_set, target_set) print cartTree.tree print target_set print numpy.array([int(round(cartTree.predict([x]))) for x in data_set])
def createDataSet(): dataSet = datasets.load_iris() iris_X = dataSet.data iris_y = dataSet.target np.random.seed(1) indices = np.random.permutation(len(iris_X)) iris_X_train = iris_X[indices[:-10]] iris_y_train = iris_y[indices[:-10]] iris_X_test = iris_X[indices[-10:]] iris_y_test = iris_y[indices[-10:]] dataSet = datasets.load_iris() return iris_X_train, iris_y_train, iris_X_test, iris_y_test
def test_load_iris(): res = load_iris() assert_equal(res.data.shape, (150, 4)) assert_equal(res.target.size, 150) assert_equal(res.target_names.size, 3) assert_true(res.DESCR) # test return_X_y option X_y_tuple = load_iris(return_X_y=True) bunch = load_iris() assert_true(isinstance(X_y_tuple, tuple)) assert_array_equal(X_y_tuple[0], bunch.data) assert_array_equal(X_y_tuple[1], bunch.target)
def test_sparse_k_means_init_centers(): from sklearn.datasets import load_iris iris = load_iris() X = iris.data # Get a local optimum centers = KMeans(n_clusters=3).fit(X).cluster_centers_ # Fit starting from a local optimum shouldn't change the solution np.testing.assert_allclose( centers, KMeans(n_clusters=3, init=centers, n_init=1).fit(X).cluster_centers_ ) # The same should be true when X is sparse X_sparse = sp.csr_matrix(X) np.testing.assert_allclose( centers, KMeans(n_clusters=3, init=centers, n_init=1).fit(X_sparse).cluster_centers_ )
def test_plot_partial_dependence_multiclass(pyplot): # Test partial dependence plot function on multi-class input. iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) # now with symbol labels target = iris.target_names[iris.target] clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs)
def check_classifiers_input_shapes(name, Classifier): iris = load_iris() X, y = iris.data, iris.target X, y = shuffle(X, y, random_state=1) X = StandardScaler().fit_transform(X) # catch deprecation warnings with warnings.catch_warnings(record=True): classifier = Classifier() set_fast_parameters(classifier) set_random_state(classifier) # fit classifier.fit(X, y) y_pred = classifier.predict(X) set_random_state(classifier) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) classifier.fit(X, y[:, np.newaxis]) msg = "expected 1 DataConversionWarning, got: %s" % ( ", ".join([str(w_x) for w_x in w])) assert_equal(len(w), 1, msg) assert_array_equal(y_pred, classifier.predict(X))
def test_cross_val_predict_with_method(): iris = load_iris() X, y = iris.data, iris.target X, y = shuffle(X, y, random_state=0) classes = len(set(y)) kfold = KFold(len(iris.target)) methods = ['decision_function', 'predict_proba', 'predict_log_proba'] for method in methods: est = LogisticRegression() predictions = cross_val_predict(est, X, y, method=method) assert_equal(len(predictions), len(y)) expected_predictions = np.zeros([len(y), classes]) func = getattr(est, method) # Naive loop (should be same as cross_val_predict): for train, test in kfold.split(X, y): est.fit(X[train], y[train]) expected_predictions[test] = func(X[test]) predictions = cross_val_predict(est, X, y, method=method, cv=kfold) assert_array_almost_equal(expected_predictions, predictions)
def test_score_memmap(): # Ensure a scalar score of memmap type is accepted iris = load_iris() X, y = iris.data, iris.target clf = MockClassifier() tf = tempfile.NamedTemporaryFile(mode='wb', delete=False) tf.write(b'Hello world!!!!!') tf.close() scores = np.memmap(tf.name, dtype=np.float64) score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64) try: cross_val_score(clf, X, y, scoring=lambda est, X, y: score) # non-scalar should still fail assert_raises(ValueError, cross_val_score, clf, X, y, scoring=lambda est, X, y: scores) finally: # Best effort to release the mmap file handles before deleting the # backing file under Windows scores, score = None, None for _ in range(3): try: os.unlink(tf.name) break except WindowsError: sleep(1.)
def load_iris_data() : # load the iris dataset from the sklearn module iris = datasets.load_iris() # extract the elements of the data that are used in this exercise return (iris.data, iris.target, iris.target_names)
def test_check_estimator_clones(): # check that check_estimator doesn't modify the estimator it receives from sklearn.datasets import load_iris iris = load_iris() for Estimator in [GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans]: with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est)) with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting est.fit(iris.data + 10, iris.target) old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est))
def testIrisSummaries(self): random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target, logdir='/tmp/skflow_tests/') score = accuracy_score(classifier.predict(iris.data), iris.target) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIris_proba(self): random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowClassifier(n_classes=3) classifier.fit(iris.data, iris.target) score = log_loss(iris.target, classifier.predict_proba(iris.data)) self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
def test_feature_union(): # basic sanity check for feature union iris = load_iris() X = iris.data X -= X.mean(axis=0) y = iris.target svd = TruncatedSVD(n_components=2, random_state=0) select = SelectKBest(k=1) fs = FeatureUnion([("svd", svd), ("select", select)]) fs.fit(X, y) X_transformed = fs.transform(X) assert_equal(X_transformed.shape, (X.shape[0], 3)) # check if it does the expected thing assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) # test if it also works for sparse input # We use a different svd object to control the random_state stream fs = FeatureUnion([("svd", svd), ("select", select)]) X_sp = sparse.csr_matrix(X) X_sp_transformed = fs.fit_transform(X_sp, y) assert_array_almost_equal(X_transformed, X_sp_transformed.toarray()) # test setting parameters fs.set_params(select__k=2) assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4)) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)]) X_transformed = fs.fit_transform(X, y) assert_equal(X_transformed.shape, (X.shape[0], 8))
def test_feature_union_weights(): # test feature union with transformer weights iris = load_iris() X = iris.data y = iris.target pca = RandomizedPCA(n_components=2, random_state=0) select = SelectKBest(k=1) # test using fit followed by transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) fs.fit(X, y) X_transformed = fs.transform(X) # test using fit_transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) X_fit_transformed = fs.fit_transform(X, y) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)], transformer_weights={"mock": 10}) X_fit_transformed_wo_method = fs.fit_transform(X, y) # check against expected result # We use a different pca object to control the random_state stream assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) assert_array_almost_equal(X_fit_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_fit_transformed[:, -1], select.fit_transform(X, y).ravel()) assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
def test_svm(): iris = load_iris() X, Y = zip(*[(x,y) for x,y in zip(iris.data,iris.target) if y in [0, 1]]) #Select 0, 1 data. svm = SVM(C=1.0, kernel='rbf') svm.fit(X, Y) svm.assert_almost_equal(svm.cost, 2.4034163345438264, sv4)
def test_pipeline_methods_preprocessing_svm(): # Test the various methods of the pipeline (preprocessing + svm). iris = load_iris() X = iris.data y = iris.target n_samples = X.shape[0] n_classes = len(np.unique(y)) scaler = StandardScaler() pca = RandomizedPCA(n_components=2, whiten=True) clf = SVC(probability=True, random_state=0) for preprocessing in [scaler, pca]: pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)]) pipe.fit(X, y) # check shapes of various prediction functions predict = pipe.predict(X) assert_equal(predict.shape, (n_samples,)) proba = pipe.predict_proba(X) assert_equal(proba.shape, (n_samples, n_classes)) log_proba = pipe.predict_log_proba(X) assert_equal(log_proba.shape, (n_samples, n_classes)) decision_function = pipe.decision_function(X) assert_equal(decision_function.shape, (n_samples, n_classes)) pipe.score(X, y)
def test_classification_report_multiclass(): """Test performance report""" iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = """\ precision recall f1-score support setosa 0.83 0.79 0.81 24 versicolor 0.33 0.10 0.15 31 virginica 0.42 0.90 0.57 20 avg / total 0.51 0.53 0.47 75 """ report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names) assert_equal(report, expected_report) # print classification report with label detection expected_report = """\ precision recall f1-score support 0 0.83 0.79 0.81 24 1 0.33 0.10 0.15 31 2 0.42 0.90 0.57 20 avg / total 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def test_classification_report(): """Test performance report""" iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = """\ precision recall f1-score support setosa 0.82 0.92 0.87 25 versicolor 0.56 0.17 0.26 30 virginica 0.47 0.90 0.62 20 avg / total 0.62 0.61 0.56 75 """ report = classification_report( y_true, y_pred, labels=range(len(iris.target_names)), target_names=iris.target_names) assert_equal(report, expected_report) # print classification report with label detection expected_report = """\ precision recall f1-score support 0 0.82 0.92 0.87 25 1 0.56 0.17 0.26 30 2 0.47 0.90 0.62 20 avg / total 0.62 0.61 0.56 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def test_classification_report_multiclass_with_digits(): """Test performance report with added digits in floating point values""" iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = """\ precision recall f1-score support setosa 0.82609 0.79167 0.80851 24 versicolor 0.33333 0.09677 0.15000 31 virginica 0.41860 0.90000 0.57143 20 avg / total 0.51375 0.53333 0.47310 75 """ report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, digits=5) assert_equal(report, expected_report) # print classification report with label detection expected_report = """\ precision recall f1-score support 0 0.83 0.79 0.81 24 1 0.33 0.10 0.15 31 2 0.42 0.90 0.57 20 avg / total 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def main(): # http://scikit-learn.org/stable/tutorial/basic/tutorial.html#loading-an-example-dataset # "A dataset is a dictionary-like object that holds all the data and some # metadata about the data. This data is stored in the .data member, which # is a n_samples, n_features array. In the case of supervised problem, one # or more response variables are stored in the .target member." # Toy datasets iris = datasets.load_iris() # The iris dataset (classification) digits = datasets.load_digits() # The digits dataset (classification) #boston = datasets.load_boston() # The boston house-prices dataset (regression) #diabetes = datasets.load_diabetes() # The diabetes dataset (regression) #linnerud = datasets.load_linnerud() # The linnerud dataset (multivariate regression) print(iris.feature_names) print(iris.data) print(iris.target_names) print(iris.target) print(digits.images[0]) print(digits.target_names) print(digits.target) plt.imshow(digits.images[0], cmap='gray', interpolation='nearest') plt.show()
def test_sparse_fit_params(): iris = load_iris() X, y = iris.data, iris.target clf = MockClassifier() fit_params = {'sparse_sample_weight': coo_matrix(np.eye(X.shape[0]))} a = cval.cross_val_score(clf, X, y, fit_params=fit_params) assert_array_equal(a, np.ones(3))
""" ====================== Discriminant Functions ====================== """ import numpy as np from sklearn.datasets import load_iris from sklearn.metrics import classification_report from sklvq import GLVQ from sklvq.discriminants import DiscriminantBaseClass data, labels = load_iris(return_X_y=True) ############################################################################### # The sklvq package contains a single discriminant function and additions are very welcome. Note # that they should work with the sklvq.objectives.GeneralizedLearningObjective, i.e., # passing additional or different arguments is not possible. # The discriminative function is depended on the objective function. This determines the # parameters of the call and gradient. See sklvq.objective.GeneralizedLearningObjective. class CustomRelativeDistance(DiscriminantBaseClass): def __call__(self, dist_same: np.ndarray, dist_diff: np.ndarray) -> np.ndarray: # dist_same = distance to prototype with same label as X. # dist_diff = distance to prototype with different label as X. return (dist_same - dist_diff) / (dist_same + dist_diff) def gradient(self, dist_same: np.ndarray, dist_diff: np.ndarray, winner_same: bool) -> np.ndarray:
cluster = tf.contrib.factorization.KMeansClustering( num_clusters=numClusters, initial_clusters=tf.contrib.factorization.KMeansClustering. KMEANS_PLUS_PLUS_INIT) cluster.train(input_fn=get_inputs, steps=2000) y_pred = cluster.predict_cluster_index(input_fn=get_inputs) y_pred = np.asarray(list(y_pred)) return y_pred def plotFigure(fignum, title, X, y): fig = plt.figure(fignum, figsize=(8, 6)) ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y.astype(np.float), edgecolor='k') ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) ax.set_xlabel('Petal width') ax.set_ylabel('Sepal length') ax.set_zlabel('Petal length') ax.set_title(title) ax.dist = 10 fig.show() if __name__ == '__main__': # sess = tf.Session() X, y = loadData(datasets.load_iris()) y_pred = kmeansCluster(X, 1) plotFigure(1, "3 clusters", X, y_pred) plotFigure(2, "Ground Truth", X, y)
def iris(): """Return the Iris data set feature matrix.""" X, _ = load_iris(return_X_y=True) return X
def train(output_dir='outputs', kernel='linear', penalty=1.0): # make sure output directory exist os.makedirs(output_dir, exist_ok=True) # Safely get the Azure ML run run = get_AMLRun() # loading the iris dataset iris = datasets.load_iris() # X -> features, y -> label X = iris.data y = iris.target class_names = iris.target_names # dividing X, y into train and test data. Random seed for reproducability X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.20, random_state=0) # create our model - a linear SVM classifier svm_model_linear = SVC(kernel=kernel, C=penalty) # evaluate each model in turn kfold = StratifiedKFold(n_splits=10, random_state=1) cv_results = cross_val_score(svm_model_linear, X_train, y_train, cv=kfold, scoring='accuracy') print('Cross Validation Mean: ', cv_results.mean()) print('Cross Validation Std: ', cv_results.std()) if run is not None: run.log_list('Cross Validation Accuracies', cv_results) run.log('Cross Validation Mean', cv_results.mean()) run.log('Cross Validation Std', cv_results.std()) # now training on the full dataset svm_model_linear.fit(X_train, y_train) y_pred = svm_model_linear.predict(X_test) # model accuracy for X_test accuracy = svm_model_linear.score(X_test, y_test) print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy)) if run is not None: run.log('Accuracy', np.float(accuracy)) # Plot non-normalized confusion matrix title = 'Test confusion matrix' disp = plot_confusion_matrix(svm_model_linear, X_test, y_test, display_labels=class_names, cmap=plt.cm.Blues) disp.ax_.set_title(title) print(title) print(disp.confusion_matrix) if run is not None: run.log_image(title, plot=plt) else: plt.savefig(os.path.join(output_dir, 'confusion_matrix.png')) # Plot normalized confusion matrix title = 'Normalized test confusion matrix' disp = plot_confusion_matrix(svm_model_linear, X_test, y_test, display_labels=class_names, cmap=plt.cm.Blues, normalize='true') disp.ax_.set_title(title) print(title) print(disp.confusion_matrix) if run is not None: run.log_image(title, plot=plt) else: plt.savefig(os.path.join(output_dir, 'confusion_matrix_normalised.png')) # Print classification report print(classification_report(y_test, y_pred)) # files saved in the "outputs" folder are automatically uploaded into # Azure ML Service run history model_folder = os.path.join(output_dir, 'model') model_path = os.path.join(model_folder, 'covid-tweets-analyis.joblib') os.makedirs(model_folder, exist_ok=True) joblib.dump(svm_model_linear, model_path) print('Output saved to', output_dir)
def loadData(): iris = datasets.load_iris() E = np.random.uniform(0, 0.1, size=(len(iris.data), 20)) X = np.hstack((iris.data, E)) y = iris.target return X, y
import pandas as pd import numpy as np from sklearn.datasets import load_iris import plotly.express as px iris = load_iris() # It returns simple dictionary like object with all data. print("IRIS Dataset Size : ", iris.data.shape, iris.target.shape) print("IRIS Flower Names : ", iris.target_names) print("IRIS Flower Feature Names : ", iris.feature_names) # Creating dataframe of total data iris_df = pd.DataFrame(data=np.concatenate((iris.data, iris.target.reshape(-1, 1)), axis=1), columns=(iris.feature_names + ['Flower Type'])) iris_df["Flower Name"] = [iris.target_names[int(i)] for i in iris_df["Flower Type"]] print(iris_df.head()) chart1 = px.scatter(data_frame=iris_df, x="sepal length (cm)", y="petal length (cm)", color="Flower Name", size=[1.0] * 150, title="sepal length (cm) vs petal length (cm) color-encoded by flower type") chart1
def generate_train_data_iris(): iris = datasets.load_iris() return iris['data'][:, 2:4], iris['target']
def __init__(self): self.iris = datasets.load_iris()
margin = 1 / np.sqrt(np.sum(linear_svm.coef_**2)) yy_down = yy - np.sqrt(1 + a**2) * margin yy_up = yy + np.sqrt(1 + a**2) * margin plt.plot(xx, yy, 'k-') plt.plot(xx, yy_down, 'k--') plt.plot(xx, yy_up, 'k--') style.use('ggplot') # create linear SVM linear_svm = svm.SVC(kernel='linear') # import source data iris = load_iris() # setup source dataframe df = pd.DataFrame(iris.data, columns=iris.feature_names) df['is_train'] = np.random.uniform(0, 1, len(df)) <= 0.75 df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names) df['species_codes'] = df['species'].cat.codes # Linear SVM: Separable - take only two classes from the iris dataset # create training set X_tr = df[df['is_train'] == True] X_tr = pd.concat( [X_tr[X_tr['species_codes'] == 0], X_tr[X_tr['species_codes'] == 1]]) y_tr = X_tr['species_codes']
from sklearn.datasets import load_iris import pandas as pd from matplotlib import pyplot as plt import numpy as np import tensorflow as tf """ 训练鸢尾花数据集 """ # 1、 读入数据 x_data = load_iris().data y_data = load_iris().target # x = load_iris().data # y_data = load_iris().target # x_data = DataFrame(x,columns=load_iris().feature_names) # pd.set_option("display.unicode.east_asian_width",True) # 设置显示方式 # print("x_data add index:\n",x_data) # x_data["Type"] = y_data # 加一个分类列 # print("x_data add index:\n",x_data) # 2、数据乱序 # 这里没有使用sklearn自带的数据集划分 # x_data 为数据 包括特征和分类 np.random.seed(116) #随机数种子 np.random.shuffle(x_data) np.random.seed(116) #随机数种子 np.random.shuffle(y_data) tf.random.set_seed(116) # 3、将数据集分成训练集和测试集 # (手动划分测试集和训练集) # 训练集和测试集之间没有交集
import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets if __name__ == '__main__': irysy = datasets.load_iris() X = irysy.data[:, :2] y = irysy.target x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 h = (x_max / x_min) / 100 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) svm = svm.SVC(kernel='linear', C=2.0) svm.fit(X, y) X_plot = np.c_[xx.ravel(), yy.ravel()] Z = svm.predict(X_plot) Z = Z.reshape(xx.shape) plt.figure() plt.contour(xx, yy, Z) plt.scatter(X[:, 0], X[:, 1], c=y) plt.show()
def main(): # Loaded Dataset iris = datasets.load_iris()
# 가장 느리지만 성능은 가장 좋다. from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from xgboost import XGBClassifier import matplotlib.pyplot as plt import numpy as np import pandas as pd import datetime import time #1. 데이터 dataset = load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, train_size=0.8, random_state=33) start = time.time() #2. 모델 model = XGBClassifier(n_jobs=8, use_label_encoder=False) #n_jobs = -1 => cpu 자원을 모두 쓰겠다. # use_label_encoder=False warning 안뜨게 하는 법 #3. 훈련 model.fit(x_train, y_train, eval_metric='logloss')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jan 15 14:12:15 2018 @author: chengch """ import numpy as np import matplotlib.pyplot as plt import sklearn from sklearn.datasets import load_iris oringal_data = load_iris() data = oringal_data['data'] def get_cov(data,k=1): for i in range(len(data[1])): data[:,i] = data[:,i] - np.mean(data[:,i]) covariance_matrix = [] for j in range(len(data[1])): for k in range(len(data[1])): dat = np.dot(data[:,j],data[:,k]) / (len(data) - 1) covariance_matrix.append(dat) covariance_matrix = np.reshape(covariance_matrix,(len(data[1]),len(data[1]))) us,vs = np.linalg.eig(covariance_matrix) us_ind = np.argsort(-us) result = [] for ind in range(k):
import sys, os import numpy as np sys.path.append(os.getcwd() + r'\Modules') import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from Perceptron_Class import Perceptron iris = datasets.load_iris() # 读出鸢尾花数据 X = iris["data"][:, (2, 3)] # 选出花瓣长、花瓣宽作为特征组属性成员 y = 2 * (iris["target"] == 2).astype( np.int64) - 1 # 生成数据的标签(是:+1(True) -> +1,不是:0(False) -> -1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5) model = Perceptron() model.fit(X_train, y_train) model.predict(X_test) plt.style.use('seaborn-darkgrid') # 在两个坐标系中分别画出训练数据点与测试数据点 fig, axs = plt.subplots(1, 2, figsize=(9, 4)) # a figure with a 1x2 grid of Axes axs[0].scatter(X_train[:, 0], X_train[:, 1], c=list(y_train), cmap=plt.cm.seismic, edgecolors='none', s=6)
# 200622_25 # iris, 다중 분류, 완성체 from sklearn.feature_selection import SelectFromModel import numpy as np from xgboost import XGBClassifier, XGBRegressor from sklearn.model_selection import train_test_split from sklearn.datasets import load_boston, load_breast_cancer, load_iris from sklearn.metrics import accuracy_score, r2_score ### 데이터 ### x, y = load_iris(return_X_y=True) print(x.shape) # (150, 4) print(y.shape) # (150, ) x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, shuffle = True, random_state = 66) ### 모델 ### model = XGBClassifier(objective='multi:softmax', n_estimators=300, learning_rate=0.1) ### 훈련 ### model.fit(x_train, y_train, verbose=True, eval_metric=['mlogloss','merror'], eval_set=[(x_train, y_train), (x_test, y_test)], early_stopping_rounds=20) # Stopping. Best iteration: [0]
from sklearn.datasets import load_iris from sklearn import tree # 1. Lets Create Data Set irisData = load_iris() print("===IRIS DATASET===") print(irisData) print(type(irisData)) print() # Array of Features :) print("===IRIS DATA FEATURES===") print(irisData.data) print() # Array of Targets print("===IRIS DATA TARGET===") print(irisData.target) print() # Array of Target Names print("===IRIS DATA TARGET NAMES===") print(irisData.target_names) # 2. Lets Create Model model = tree.DecisionTreeClassifier() # 3. Train the Model | Supervised Learning
def load_data(): iris=datasets.load_iris() x_train=iris.data y_train=iris.target return train_test_split(x_train, y_train, test_size=0.25, random_state=0, stratify=y_train)
def iris_dataframe(): iris = datasets.load_iris() return pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- 'sklearn来进行模型评估,还是山鸢尾' import numpy as np import pandas as pd from sklearn import datasets, metrics from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # 加载山鸢尾数据集合: scikit_iris = datasets.load_iris() pd_iris = pd.DataFrame( data=np.c_[scikit_iris['data'], scikit_iris['target']], columns=np.append(scikit_iris['feature_names'], 'y') ) x = pd_iris[scikit_iris['feature_names']] y = pd_iris['y'] x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.3, random_state=0) knn=KNeighborsClassifier(n_neighbors=10) knn.fit(x_train,y_train) # 由于训练集合和测试集合并没有本质区别,所以让模型分别测试: y_predict_on_train=knn.predict(x_train) y_predict_on_test=knn.predict(x_test) print('准确率为:{:.2%}'.format(metrics.accuracy_score(y_train,y_predict_on_train))) print('准确率为:{:.2%}'.format(metrics.accuracy_score(y_test,y_predict_on_test)))
def test_iris_f_min(op, num_folds=5): from sklearn import datasets iris = datasets.load_iris() return test_f_min(op, iris.data, iris.target, num_folds=num_folds)
def test_pipeline_column_transformer(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2") X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4") y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3) numeric_transformer = Pipeline(steps=[ ("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler()), ]) categorical_transformer = Pipeline(steps=[ ( "onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"), ), ( "tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4), ), ]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) initial_type = [ ("numfeat", FloatTensorType([None, 3])), ("strfeat", StringTensorType([None, 2])), ] X_train = X_train[:11] model_onnx = convert_sklearn(model, initial_types=initial_type, target_opset=TARGET_OPSET) dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipeliner") if __name__ == "__main__": from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer pydot_graph = GetPydotGraph( model_onnx.graph, name=model_onnx.graph.name, rankdir="TP", node_producer=GetOpNodeProducer("docstring")) pydot_graph.write_dot("graph.dot") import os os.system("dot -O -G=300 -Tpng graph.dot")
import tensorflow as tf from sklearn.datasets import load_iris data = load_iris() from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33, random_state=42) model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation=tf.nn.softmax) ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=50) model.evaluate(x_test, y_test)
def get_dataset(): iris = datasets.load_iris() names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] dataset = pd.DataFrame(iris.data, columns=names) dataset['class'] = iris.target return dataset
def setUp(self): self.X, self.y = load_iris(return_X_y=True)
def test_feature_importances_single_leaf(self): clf = lgb.LGBMClassifier(n_estimators=100) data = load_iris() clf.fit(data.data, data.target) importances = clf.feature_importances_ self.assertEqual(len(importances), 4)
per[component, :].T.reshape(-1, 1) @ self.W[component, :].T.reshape(1, -1) def save_remodelling_components(self): self.save_result('remodelling_components.csv', self.scaled_remodelling_components) def save_transformed_data(self): self.save_result('scores.csv', self.T) # -----PLS testing-------------------------------------------------------------------------------------------------- if __name__ == '__main__': path_to_data = os.path.join(str(Path.home()), 'Deformetrica', 'deterministic_atlas_ct', 'output_separate_tmp10_def10_prttpe13_corrected', 'Decomposition') data_filename = 'Momenta_Table.csv' data, target = load_iris(return_X_y=True) data = data[0:80, 0:3] target = target[0:80] pls = PLSBinaryClassification(dataset_filename=data_filename, dataset_path=path_to_data, X=data, y=target) pls.decompose_with_pls(method='da') plsr = PLSRegression(3, scale=False) x_plsr, y_plsr = plsr.fit_transform(pls.X_centered, pls.y) plt.scatter(plsr.x_scores_[pls.y == 1, 0], plsr.x_scores_[pls.y == 1, 1], c='red', marker='d') plt.scatter(plsr.x_scores_[pls.y == -1, 0], plsr.x_scores_[pls.y == -1, 1], c='blue', marker='x') x = np.linspace(-2, 2, 100) print('W:\n {}'.format(pls.W)) print('xw:\n {}'.format(plsr.x_weights_)) print('T:\n {}'.format(pls.T))
def setUpClass(self): # runs once per test class iris_data = load_iris() self.X = iris_data['data'] self.y = iris_data['target']
import numpy as np from IPython.display import display from sklearn import datasets iris = datasets.load_iris() x = iris['data'][:, [2, 3]] X = np.c_[np.ones((len(x), 1)), x] theta = np.array([0, 0, 0]).reshape(-1, 1) y = (iris['target'] == 2).reshape(-1, 1).astype(int) from scipy.special import expit # = expit, maar deze vorm in hastie (120) def fct(z): return np.exp(z) / (np.exp(z) + 1) for i in range(0, 100): z = X.dot(theta) a = fct(z) grad = X.T.dot(y - a) D = np.diag((-a * (1 - a)).ravel()) H = X.T.dot(D).dot(X) theta = theta - np.linalg.inv(H).dot(grad) display(theta) import matplotlib.pyplot as plt import seaborn
import tensorflow as tf from sklearn import datasets from matplotlib import pyplot as plt import numpy as np #导入数据 x_data = datasets.load_iris().data y_data = datasets.load_iris().target ''' 打乱顺序 seed为随机种子,保证每次生成的随机数一样 ''' np.random.seed(116) np.random.shuffle(x_data) np.random.seed(116) np.random.shuffle(y_data) tf.random.set_seed(116) #拆分训练集和测试集,前120组作为训练集,后30组作为测试机 x_train = x_data[:-30] y_train = y_data[:-30] x_test = x_data[-30:] y_test = y_data[-30:] #转化数据,将x都转化为同一类数据 x_train = tf.cast(x_train,tf.float32) x_test = tf.cast(x_test,tf.float32) '''