def test_perceptron(self): # train the perceptron model perceptron = Perceptron(learning_rate=0.1, num_epochs=10) perceptron.fit(self.x, self.y) # plot learning curve = { 'cost_length': len(perceptron.cost), 'cost': perceptron.cost, 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'Number of updates', 'title': 'Perceptron - Learning rate 0.1' } Plotter.plot_learning_curve( curve, FilesystemUtils.get_test_resources_plot_file_name( 'perceptron/Perceptron-Learning-Curve.png')) # plot decision boundary diagram_options = { 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' } Plotter.plot_decision_boundary( self.x, self.y, classifier=perceptron, diagram_options=diagram_options, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'perceptron/Perceptron-Decision-Boundary.png'))
def test_scikit_learn_decision_tree(self): # Train the decision tree. # Most algorithms in scikit-learn already support multiclass classification via the One-versus-Rest (OvR) method tree = DecisionTreeClassifier(criterion='gini', max_depth=4, random_state=1) tree.fit(self.x_train, self.y_train) self.predict_and_evaluate( tree, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'decision_tree/DecisionTree-ScikitLearn-Decision-Boundary.png') ) # save the tree as a digram dot_data = export_graphviz( tree, filled=True, rounded=True, class_names=['Setosa', 'Versicolor', 'Virginica'], feature_names=['petal length', 'petal width'], out_file=None) graph = graph_from_dot_data(dot_data) graph.write_png( FilesystemUtils.get_test_resources_plot_file_name( 'decision_tree/DecisionTree-ScikitLearn-Tree-Diagram.png'))
def test_logistic_regresssion(self): # train the logistic regression model logistic_regression = LogisticRegressionBGD(learning_rate=0.05, num_epochs=1000) logistic_regression.fit(self.x, self.y) # plot learning curve curve = { 'cost_length': len(logistic_regression.cost), 'cost': np.log10(logistic_regression.cost), 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'Number of updates', 'title': 'Logistic regression - Learning rate 0.05' } Plotter.plot_learning_curve( curve, FilesystemUtils.get_test_resources_plot_file_name( 'logistic_regression/LogisticRegressionBGD-Learning-Curve.png') ) # plot decision boundary diagram_options = { 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' } Plotter.plot_decision_boundary( self.x, self.y, classifier=logistic_regression, diagram_options=diagram_options, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'logistic_regression/LogisticRegressionBGD-Decision-Boundary.png' ))
def test_adaline(self): # train the first with bigger learning ratio adaline1 = AdalineBGD(learning_rate=0.01, num_epochs=30) adaline1.fit(self.x, self.y) # train the second adaline model with smaller learning ration adaline2 = AdalineBGD(learning_rate=0.0001, num_epochs=30) adaline2.fit(self.x, self.y) # plot multiple learning curves for both adaline trained models curves = [{ 'cost_length': len(adaline1.cost), 'cost': np.log10(adaline1.cost), 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'log(Sum-squared-error)', 'title': 'Adaline - Learning rate 0.1' }, { 'cost_length': len(adaline2.cost), 'cost': np.log10(adaline2.cost), 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'log(Sum-squared-error)', 'title': 'Adaline - Learning rate 0.0001' }] Plotter.plot_multiple_learning_curves( curves, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineBGD-Learning-Curves.png')) # plot decision boundary for divergent model (adaline 1) Plotter.plot_decision_boundary( self.x, self.y, classifier=adaline1, diagram_options={ 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' }, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineBGD-Decision-Boundary-Divergent.png')) # plot decision boundary for convergent model (adaline 2) Plotter.plot_decision_boundary( self.x, self.y, classifier=adaline2, diagram_options={ 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' }, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineBGD-Decision-Boundary-Convergent.png'))
def setUp(self): # load subset of Iris data iris_data_reader = IrisDataReader( FilesystemUtils.get_resources_data_file_name('iris/iris.data')) self.x, self.y = iris_data_reader.get_data() # plotter data and save it to file Plotter.plot_iris_data_set( self.x, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/Adaline-Training-Set.png'))
def test_scikit_learn_svm_nonlinear_iris(self): svm = SVC(kernel='rbf', random_state=1, gamma=0.2, C=1.0) svm.fit(self.x_train, self.y_train) self.predict_and_evaluate( svm, FilesystemUtils.get_test_resources_plot_file_name( 'svm/SVM-ScikitLearn-LowGamma-Decision-Boundary.png')) svm = SVC(kernel='rbf', random_state=1, gamma=100.0, C=1.0) svm.fit(self.x_train, self.y_train) self.predict_and_evaluate( svm, FilesystemUtils.get_test_resources_plot_file_name( 'svm/SVM-ScikitLearn-HighGamma-Decision-Boundary.png'))
def test_draw_impurity_criteria_types(self): # For a visual comparison of the three different impurity criteria (Entropy, Gini, Misclassification Error), # let us plot the impurity indices for the probability range [0, 1] for class 1. # Note that we will also add a scaled version of the entropy (entropy / 2) to observe that the Gini impurity is # an intermediate measure between entropy and the classification error. x_range = np.arange(start=0.0, stop=1.0, step=0.01, dtype=float) # compute entropy and scaled entropy ent = [self.entropy(p) if p != 0 else None for p in x_range] sc_ent = [e * 0.5 if e else None for e in ent] # compute gini gn = [self.gini(p) for p in x_range] # computer misclassification error err = [self.misclassification_error(p) for p in x_range] Plotter.plot_impurity_criteria( x_range, entropy=ent, scaled_entropy=sc_ent, gini=gn, misclassification_error=err, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'decision_tree/DecisionTree-ScikitLearn-Impurity-Criteria.png') )
def test_scikit_learn_validation_curves_on_wdbc_pipeline(self): # create a pipeline to test wdbc_pipeline = make_pipeline( StandardScaler(), PCA(n_components=2), LogisticRegression(solver='lbfgs', random_state=42)) # prepare validation curves # Validation curves are a useful tool for improving the performance of a model by addressing issues such as # overfitting or underfitting. # Validation curves are related to learning curves, but instead of plotting the training and test accuracies as # functions of the sample size, we vary the values of the model parameters, for example, the # inverse regularization parameter C in logistic regression. num_folds = 10 param_range = np.array([0.001, 0.01, 0.1, 1.0, 10.0, 100.0]) train_scores, test_scores = validation_curve( estimator=wdbc_pipeline, X=self.x_train, y=self.y_train, param_name='logisticregression__C', param_range=param_range, cv=num_folds, n_jobs=-1) image_file_path = FilesystemUtils.get_test_resources_plot_file_name( 'model_performance/ModelPerformance-ScikitLearn-ValidationCurves.png' ) Plotter.plot_validation_curves(param_range, train_scores, test_scores, image_file_path=image_file_path)
def test_scikit_learn_learning_curves_on_wdbc_pipeline(self): # create a pipeline to test wdbc_pipeline = make_pipeline( StandardScaler(), PCA(n_components=2), LogisticRegression(solver='lbfgs', random_state=42)) # prepare learning curves # Via the train_sizes parameter in the learning_curve function, we can control the absolute or relative number # of training samples that are used to generate the learning curves. # Here, we set train_sizes=np.linspace(0.1, 1.0, 10) to use 10 evenly spaced, relative intervals for the # training set sizes. By default, the learning_curve function uses stratified k-fold cross-validation to # calculate the cross-validation accuracy of a classifier, and we set k=num_folds via the cv parameter for # k-fold stratified cross-validation. # Finally, we plot the diagram using a helper function. num_folds = 10 train_sizes, train_scores, test_scores = learning_curve( estimator=wdbc_pipeline, X=self.x_train, y=self.y_train, train_sizes=np.linspace(0.1, 1.0, 10), cv=num_folds, n_jobs=-1) image_file_path = FilesystemUtils.get_test_resources_plot_file_name( 'model_performance/ModelPerformance-ScikitLearn-LearningCurves.png' ) Plotter.plot_performance_curves(train_sizes, train_scores, test_scores, image_file_path=image_file_path)
def test_scikit_learn_confusion_matrix_by_svm(self): # create a pipeline to test wdbc_pipeline = make_pipeline( StandardScaler(), SVC(random_state=42) ) wdbc_pipeline.fit(self.x_train, self.y_train) y_pred = wdbc_pipeline.predict(self.x_test) cm = confusion_matrix(y_true=self.y_test, y_pred=y_pred) # display confusion matrix print('Confusion matrix') print(cm) # display metrics print('Precision: {:.3f}'.format(precision_score(y_true=self.y_test, y_pred=y_pred))) print('Recall: {:.3f}'.format(recall_score(y_true=self.y_test, y_pred=y_pred))) print('F1: {:.3f}'.format(f1_score(y_true=self.y_test, y_pred=y_pred))) # The array that was returned after executing the code provides us with information about the different types of # error the classifier made on the test dataset. image_file_path = FilesystemUtils.get_test_resources_plot_file_name( 'model_performance/ModelPerformance-ScikitLearn-ConfusionMatrix.png' ) Plotter.plot_confusion_matrix(cm, image_file_path=image_file_path)
def load_wine_data_set(self): # Load a non linearly separable dataset # There are 13 different features in the Wine dataset, describing the chemical properties of the 178 wine # samples, and each sample belongs to one of three different classes, 1, 2, and 3, which refer to the three # different types of grape grown in the same region in Italy but derived from different wine cultivars, as # described in the dataset summary (https://archive. ics.uci.edu/ml/machine-learning-databases/wine/wine.names). # load wine data set df = pd.read_csv( FilesystemUtils.get_resources_data_file_name('wine/wine.data'), header=None) # create headers df.columns = [ 'Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline' ] # print sample info: # values of class labels print('Class labels', np.unique(df['Class label'])) # 10% of the shuffled data set samples with respective class labels (shuffling is necessary as rows are sorted # according to class label values in increasing order print(df.sample(frac=0.1).to_string()) # split training and test set # separate class labels from features self.x, self.y = df.iloc[:, 1:].values, df.iloc[:, 0].values # save column names for later usage self.df_columns = df.columns
def test_variable_L1_regularization_strength(self): # first, split the entire dataset into the training and testing subsets just for pretending this is a real # training scenario X_train, X_test, Y_train, Y_test = train_test_split(self.x, self.y, test_size=0.3, random_state=0, stratify=self.y) # standardize data sets X_train_standardized = StandardScaler().fit_transform(X_train) weights, params = [], [] for c in np.arange(-4., 6.): lr = LogisticRegression(penalty='l1', C=10.**c, random_state=0) lr.fit(X_train_standardized, Y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) Plotter.plot_variable_feature_weights( weights, params, self.df_columns, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'regularization/Logistic-Regression-Variable-L1-Regularized-Strength.png' ))
def load_iris_dataset(self): # Loading the Iris dataset from scikit-learn. # The classes are already converted to integer labels where 0=Iris-Setosa, 1=Iris-Versicolor, 2=Iris-Virginica. iris = datasets.load_iris() x = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) # plotter data and save it to file Plotter.plot_iris_data_set(x, FilesystemUtils.get_test_resources_plot_file_name( 'ScikitLearn-Iris-Training-Set.png')) # Splitting data into 70% training and 30% test data x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1, stratify=y) print('Labels counts in y:', np.bincount(y)) print('Labels counts in y_train:', np.bincount(y_train)) print('Labels counts in y_test:', np.bincount(y_test)) # Standardize features sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) sc.fit(x_test) x_test_std = sc.transform(x_test) self.x_train = x_train_std self.y_train = y_train self.x_test = x_test_std self.y_test = y_test
def test_feature_importance(self): # split training and testing dataset x_train, x_test, y_train, y_test = train_test_split(self.x, self.y, test_size=0.25, random_state=42) # standardize features stdsc = StandardScaler() x_train_std = stdsc.fit_transform(x_train) # use random forest forest = RandomForestClassifier(n_estimators=500, random_state=42) forest.fit(x_train_std, y_train) importance = forest.feature_importances_ indices = np.argsort(importance)[::-1] feature_names = self.df_columns[1:] for f in range(x_train_std.shape[1]): print( "%2d) %-*s %f" % (f + 1, 30, feature_names[indices[f]], importance[indices[f]])) Plotter.plot_feature_importance( x_train_std.shape[1], importance[indices], feature_names[indices], FilesystemUtils.get_test_resources_plot_file_name( 'feature_importance/FeatureImportance.png'))
def test_adaline_with_stochastic_update(self): # standardize features x_std: np.matrix = np.copy(self.x) x_std[:, 0] = (self.x[:, 0] - self.x[:, 0].mean()) / self.x[:, 0].std() x_std[:, 1] = (self.x[:, 1] - self.x[:, 1].mean()) / self.x[:, 1].std() # plotter data and save it to file Plotter.plot_iris_data_set( x_std, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Standardized-Training-Set.png')) # train adaline on standardized features with a small number of epochs adaline = AdalineSGD(learning_rate=0.01, num_epochs=15) adaline.fit(x_std, self.y) # plot learning curve curve = { 'cost_length': len(adaline.cost), 'cost': adaline.cost, 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'log(Sum-squared-error)', 'title': 'Adaline - Learning rate 0.01' } Plotter.plot_learning_curve( curve, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Learning-Curve-Standardized-Features.png')) # plot decision boundary Plotter.plot_decision_boundary( x_std, self.y, classifier=adaline, diagram_options={ 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' }, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Decision-Boundary-Standardized-Features.png' )) adaline.partial_fit(x_std[0, :], self.y[0])
def test_scikit_learn_perceptron(self): # Train the perceptron. # Most algorithms in scikit-learn already support multiclass classification via the One-versus-Rest (OvR) method perceptron = Perceptron(n_iter=40, eta0=0.1, random_state=1) perceptron.fit(self.x_train, self.y_train) self.predict_and_evaluate(perceptron, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'perceptron/Perceptron-ScikitLearn-Decision-Boundary.png'))
def test_scikit_learn_knn(self): # Train the perceptron. # Most algorithms in scikit-learn already support multiclass classification via the One-versus-Rest (OvR) method knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(self.x_train, self.y_train) self.predict_and_evaluate( knn, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'knn/KNN-ScikitLearn-Decision-Boundary.png'))
def test_scikit_learn_svm(self): # Train the SVM. # Most algorithms in scikit-learn already support multiclass classification via the One-versus-Rest (OvR) method svm = SVC(kernel='linear', C=1.0, random_state=1) svm.fit(self.x_train, self.y_train) self.predict_and_evaluate( svm, FilesystemUtils.get_test_resources_plot_file_name( 'svm/SVM-ScikitLearn-Decision-Boundary.png'))
def test_scikit_learn_random_forest(self): forest = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1, n_jobs=2) forest.fit(self.x_train, self.y_train) self.predict_and_evaluate( forest, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'decision_tree/RandomForest-ScikitLearn-Decision-Boundary.png') )
def test_scikit_learn_perceptron_by_SGDClassifier(self): # Train the SVM. # Most algorithms in scikit-learn already support multiclass classification via the One-versus-Rest (OvR) method # Sometimes our datasets are too large to fit into computer memory, thus, scikit-learn also offers alternative # implementations viaThe SGDClassifier class, which also supports online learning via the partial_fit method. # The concept behind the SGDClassifier class is similar to the stochastic gradient algorithm perceptron = SGDClassifier(loss='perceptron') perceptron.fit(self.x_train, self.y_train) self.predict_and_evaluate(perceptron, FilesystemUtils.get_test_resources_plot_file_name( 'perceptron/Perceptron-ScikitLearn-Classifier-Decision-Boundary.png'))
def load_svm_nonlinear_data_set(self): # Load a non linearly separable dataset np.random.seed(1) x_xor = np.random.randn(200, 2) y_xor = np.logical_xor(x_xor[:, 0] > 0, x_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) # plotter data and save it to file Plotter.plot_svm_nonlinear_data_set( x_xor, y_xor, FilesystemUtils.get_test_resources_plot_file_name( 'svm/SVM-ScikitLearn-NonLinear-Training-Set.png')) self.x_train = x_xor self.y_train = y_xor
def test_scikit_learn_svm_nonlinear(self): # The γ parameter, which we set to gamma=0.1, can be understood as a cut-off parameter for the Gaussian sphere. # If we increase the value for γ , we increase the influence or reach of the training samples, which leads to a # tighter and bumpier decision boundary. svm = SVC(kernel='rbf', random_state=1, gamma=0.10, C=10.0) svm.fit(self.x_train, self.y_train) diagram_options = { 'x_label': 'feature 1', 'y_label': 'feature 2', 'legend': 'best' } Plotter.plot_decision_boundary( self.x_train, self.y_train, svm, diagram_options, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'svm/SVM-ScikitLearn-NonLinear-Decision-Boundary.png'))
def test_elbow_method(self): # To quantify the quality of clustering, we need to use intrinsic metrics—such as the within-cluster # Sum of Squared Errors (SSE), which is sometimes also called cluster inertia or distortion to compare the # performance of different k-means clusterings. # Conveniently, we don't need to compute the within-cluster SSE explicitly when we are using scikit-learn, as it # is already accessible via the inertia_ attribute after fitting a KMeans model: # >>> print('Distortion: %.2f' % km.inertia_) # Distortion: 72.48 # Based on the within-cluster SSE, we can use a graphical tool, the so-called elbow method, to estimate the # optimal number of clusters k for a given task. Intuitively, we can say that, if k increases, the distortion # will decrease. This is because the samples will be closer to the centroids they are assigned to. # The idea behind the elbow method is to identify the value of k where the distortion begins to increase most # rapidly distortions = [] k_range = range(1, 11) for i in k_range: km = KMeans( # number of clusters n_clusters=i, # initialization method init='k-means++', # number of different experiments run n_init=10, # maximum number of iteration max_iter=300, # change within cluster minimum threshold: below it, the training process is stopped tol=1e-04, # initialization seed random_state=42) km.fit(self.x) distortions.append(km.inertia_) data = np.column_stack((k_range, distortions)) Plotter.plot_data( data, x_label='Number of clusters', y_label='Distorsion', title='Elbow method for optimal number of clusters', image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'clustering/ElbowMethodForKMeansClustering.png'))
def test_scikit_learn_roc_auc_on_wdbc_pipeline(self): # create a pipeline to test wdbc_pipeline = make_pipeline( StandardScaler(), PCA(n_components=2), LogisticRegression(solver='lbfgs', penalty='l2', C=100.0, random_state=42) ) # create a stratified n_splits-fold cross-validation # In this example, we use 3 folds on the training test, actually cv = StratifiedKFold(n_splits=3, random_state=42).split(self.x_train, self.y_train) false_positive_rates = [] true_positive_rates = [] roc_auc_values = [] # let's use just 10 features to have a more interesting diagram x_train_reduced = self.x_train[:, [4, 14]] # iterate over the folds to draw the related ROC curve for i, (train, test) in enumerate(cv): # compute the probabilities predicted by the classifier using the current fold probs = wdbc_pipeline.fit(x_train_reduced[train], self.y_train[train]).predict_proba(x_train_reduced[test]) # computer ROC curve arrays fpr, tpr, thresholds = roc_curve(self.y_train[test], probs[:, 1], pos_label=1) # compute AUC (Area Under Curve) roc_auc = auc(fpr, tpr) false_positive_rates.append(fpr) true_positive_rates.append(tpr) roc_auc_values.append(roc_auc) # display results image_file_path = FilesystemUtils.get_test_resources_plot_file_name( 'model_performance/ModelPerformance-ScikitLearn-ROC_AUC.png' ) Plotter.plot_roc_auc( np.asarray(false_positive_rates), np.asarray(true_positive_rates), np.asarray(roc_auc_values), image_file_path=image_file_path )
def test_sequential_feature_selection(self): knn = KNeighborsClassifier(n_neighbors=5) # standardize features stdsc = StandardScaler() x_std = stdsc.fit_transform(self.x) # selecting features sbs = SequentialFeatureSelection(knn, selected_features_number=1) sbs.fit(x_std, self.y) Plotter.plot_accuracy_by_feature_number( sbs.subsets_, sbs.scores_, FilesystemUtils.get_test_resources_plot_file_name( 'sequential_feature_selection/AccuracyByFeatureNumber.png')) # for each subset of features, print column names, so to see how the elimination process worked print('Progressive selection explained') for feature_subset in sbs.subsets_: indices = list(feature_subset) print(self.df_columns[1:][indices])
def plot_predictions(self, predictions: np.matrix, centroids: np.matrix, title: str, diagram_file_name: str): data = [{ 'x': self.x[predictions == 0, :], 'color': 'lightgreen', 'marker': 's', 'marker_size': 50, 'edge_color': 'black', 'label': 'cluster 1' }, { 'x': self.x[predictions == 1, :], 'color': 'orange', 'marker': 's', 'marker_size': 50, 'edge_color': 'black', 'label': 'cluster 2' }, { 'x': self.x[predictions == 2, :], 'color': 'lightblue', 'marker': 's', 'marker_size': 50, 'edge_color': 'black', 'label': 'cluster 3' }] centroids = { 'x': centroids, 'color': 'red', 'marker': '*', 'marker_size': 250, 'edge_color': 'black', 'label': 'centroids' } Plotter.plot_multiple_scattered_data( data, centroids, title, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'clustering/' + diagram_file_name))
def setUp(self): # load subset of Iris data iris = datasets.load_iris() x_train = iris.data[:, [2, 3]] y_train = iris.target # consider only 0 and 1 labels x_train_01_subset = x_train[(y_train == 0) | (y_train == 1)] y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)] # Standardize features sc = StandardScaler() sc.fit(x_train_01_subset) self.x = sc.transform(x_train_01_subset) self.y = y_train_01_subset print('Class labels:', np.unique(self.y)) # plotter data and save it to file Plotter.plot_iris_data_set( self.x, FilesystemUtils.get_test_resources_plot_file_name( 'logistic_regression/LogisticRegressionBGD-Training-Set.png'))
def test_plot_sample_clustered_data(self): Plotter.plot_scattered_data( self.x, title='Sample clustered data', image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'clustering/SampleClusteredData.png'))