コード例 #1
0
def feature_correlation_pearson(path="images/feature_correlation_pearson.png"):
    data = datasets.load_diabetes()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])

    visualizer = FeatureCorrelation(labels=feature_names)
    visualizer.fit(X, y)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #2
0
    def test_feature_correlation_integrated_mutual_info_regression(self):
        """
        Test FeatureCorrelation visualizer with mutual information regression
        """
        viz = FeatureCorrelation(method='mutual_info-regression')
        viz.fit(self.X, self.y, random_state=23456)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #3
0
    def test_feature_correlation_integrated_mutual_info_regression(self):
        """
        Test FeatureCorrelation visualizer with mutual information regression
        """
        viz = FeatureCorrelation(method='mutual_info-regression')
        viz.fit(self.X, self.y, random_state=23456)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #4
0
def feature_correlation_pearson(
        path="images/feature_correlation_pearson.png"):
    data = datasets.load_diabetes()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])

    visualizer = FeatureCorrelation(labels=feature_names)
    visualizer.fit(X, y)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #5
0
    def test_feature_correlation_integrated_pearson(self):
        """
        Test FeatureCorrelation visualizer with pearson correlation
        coefficient
        """
        viz = FeatureCorrelation()
        viz.fit(self.X, self.y)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #6
0
    def test_feature_correlation_integrated_pearson(self):
        """
        Test FeatureCorrelation visualizer with pearson correlation
        coefficient
        """
        viz = FeatureCorrelation()
        viz.fit(self.X, self.y)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #7
0
def mutual_info_classification(classes, feature_names, X, y):
    from sklearn import datasets
    from yellowbrick.target import FeatureCorrelation

    # Load the regression data set

    visualizer = FeatureCorrelation(method='mutual_info-classification',
                                    feature_names=feature_names,
                                    sort=True)
    visualizer.fit(X, y, random_state=0)
    visualizer.poof()
コード例 #8
0
def pearson_correlation(classes, fetures, X, Y):
    from sklearn import datasets
    from yellowbrick.target import FeatureCorrelation

    # Load the regression data set
    # data = datasets.load_diabetes()
    # X, y = data['data'], data['target']
    # feature_names = np.array(data['feature_names'])

    visualizer = FeatureCorrelation(labels=fetures)
    visualizer.fit(X, Y)
    visualizer.poof()
コード例 #9
0
    def report(self, pipeline: AbstractPipeline):

        folder = get_cache_path()
        path = pkg_resources.resource_filename(
            'crcdal', 'cache/' + folder + '/' + self.sub_folder + '/')
        pkg_resources.ensure_directory(path)

        feature_names = list(pipeline.train.columns())
        visualizer = FeatureCorrelation(labels=feature_names)
        visualizer.fit(pipeline.train, pipeline.train_y)
        visualizer.poof(outpath=path + pipeline.dataset_tag +
                        '_model_feature_correlation_report.csv')
コード例 #10
0
def feature_correlation_mutual_info_classification(
        path="images/feature_correlation_mutual_info_classification.png"):
    data = datasets.load_wine()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])
    X_pd = pd.DataFrame(X, columns=feature_names)

    feature_to_plot = ['alcohol', 'ash', 'hue', 'proline', 'total_phenols']

    visualizer = FeatureCorrelation(method='mutual_info-classification',
                                    feature_names=feature_to_plot)
    visualizer.fit(X_pd, y, random_state=0)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #11
0
    def test_feature_correlation_integrated_mutual_info_classification(self):
        """
        Test FeatureCorrelation visualizer with mutual information
        on wine dataset (classification)
        """
        data = datasets.load_wine()
        X, y = data['data'], data['target']

        viz = FeatureCorrelation(method='mutual_info-classification')
        viz.fit(X, y, random_state=12345)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #12
0
    def test_feature_correlation_integrated_mutual_info_classification(self):
        """
        Test FeatureCorrelation visualizer with mutual information
        on wine dataset (classification)
        """
        data = datasets.load_wine()
        X, y = data['data'], data['target']

        viz = FeatureCorrelation(method='mutual_info-classification')
        viz.fit(X, y, random_state=12345)
        viz.poof()

        self.assert_images_similar(viz)
コード例 #13
0
def mutual_info_regress(classes, feature_names, X, y):
    from sklearn import datasets
    from yellowbrick.target import FeatureCorrelation

    # Load the regression data set

    discrete_features = [False for _ in range(len(feature_names))]
    discrete_features[1] = True

    visualizer = FeatureCorrelation(method='mutual_info-regression',
                                    labels=feature_names)
    visualizer.fit(X, y, discrete_features=discrete_features, random_state=0)
    visualizer.poof()
コード例 #14
0
def feature_correlation_mutual_info_regression(
        path="images/feature_correlation_mutual_info_regression.png"):
    data = datasets.load_diabetes()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])

    discrete_features = [False for _ in range(len(feature_names))]
    discrete_features[1] = True

    visualizer = FeatureCorrelation(method='mutual_info-regression',
                                    labels=feature_names, sort=True)
    visualizer.fit(X, y, discrete_features=discrete_features, random_state=0)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #15
0
def feature_correlation_mutual_info_classification(
        path="images/feature_correlation_mutual_info_classification.png"):
    data = datasets.load_wine()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])
    X_pd = pd.DataFrame(X, columns=feature_names)

    feature_to_plot = ['alcohol', 'ash', 'hue', 'proline', 'total_phenols']

    visualizer = FeatureCorrelation(method='mutual_info-classification',
                                    feature_names=feature_to_plot)
    visualizer.fit(X_pd, y, random_state=0)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #16
0
def feature_correlation_mutual_info_regression(
        path="images/feature_correlation_mutual_info_regression.png"):
    data = datasets.load_diabetes()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])

    discrete_features = [False for _ in range(len(feature_names))]
    discrete_features[1] = True

    visualizer = FeatureCorrelation(method='mutual_info-regression',
                                    labels=feature_names,
                                    sort=True)
    visualizer.fit(X, y, discrete_features=discrete_features, random_state=0)
    visualizer.poof(outpath=path, clear_figure=True)
コード例 #17
0
                                                cv=cv)
clf = scores['estimator'][np.argmax(scores['test_score'])]
print(np.max(scores['test_score']))

# %%
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(Xv)

# %%
shap.summary_plot(shap_values, Xv, plot_type="bar")

# %%
feat = feature_names[feat][np.mean(abs(shap_values), axis=0) > 0.55]
print(feat)
X = X[feat]

# %%
visualizer = FeatureCorrelation(method='mutual_info-classification')
visualizer.fit(X, y)
visualizer.poof()

# %%
# This step doesn't always produce the same result, idk why.
feat = visualizer.features_[visualizer.scores_ > 0.04]
X = X[feat]

# %%
# Our final 10 features:
# [263, 268, 287, 288, 300, 302, 307, 308, 313, 315]
print(feat)
コード例 #18
0
 def draw_feature_correlation(self):
     visualizer = FeatureCorrelation(method='mutual_info-classification',
                                     labels=self.get_feature_labels(),
                                     sort=True)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.poof()