Beispiel #1
0
    def handle(self, *args, **kwargs):
        # get model
        TARGET_MODEL = 59
        job = Job.objects.filter(pk=TARGET_MODEL)[0]
        model = joblib.load(job.predictive_model.model_path)[0]
        # load data
        training_df, test_df = get_encoded_logs(job)
        training_df['label'] = training_df['label'].astype(bool).astype(int)
        columns = list(training_df.columns.values)
        features = list(
            training_df.drop(['trace_id', 'label'], 1).columns.values)
        feature = 'Age_1'
        feature_grids, percentile_info = _get_grids(
            feature_values=training_df[feature].values,
            num_grid_points=10,
            grid_type=None,
            percentile_range='percentile',
            grid_range=None)
        custom_grids = []
        indexs = []
        for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)):
            custom_grids.append(x)
        print(features)
        fig, axes, summary_df = info_plots.target_plot(
            df=training_df,
            feature=feature,
            feature_name='feature value',
            cust_grid_points=custom_grids,
            target='label',
            show_percentile=False)
        fig.savefig('ice_plot_train_1_3_CType.png')

        lists = list(training_df[feature].values)
        for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)):
            indexs.append(lists.index(x))
        encoder = retrieve_proper_encoder(job)
        encoder.decode(training_df, job.encoding)
        values = training_df[feature].values
        training_df
        lst = []
        print(summary_df)
        if job.encoding.value_encoding != ValueEncodings.BOOLEAN.value:
            for x in range(len(indexs) - 1):
                lst.append({
                    'value': values[indexs[x]],
                    'label': summary_df['label'][x],
                    'count': summary_df['count'][x],
                })
        else:
            for x in range(summary_df.shape[0]):
                lst.append({
                    'value': summary_df['display_column'][x],
                    'label': summary_df['label'][x],
                    'count': summary_df['count'][x],
                })
        print(lst)
Beispiel #2
0
    def test_grids_grid_range_equal(self, titanic_data):
        # grid_range=(0, 100), grid_type='equal'
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='equal',
            percentile_range=None, grid_range=(0, 100))

        expected_feature_grids = np.array([0.0, 44.44444444, 77.77777777])
        expected_percentile_info = []

        assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8)
        assert percentile_info == expected_percentile_info
Beispiel #3
0
    def test_grids_grid_range_percentile(self, titanic_data):
        # grid_range=(0, 100), grid_type='percentile'
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='percentile',
            percentile_range=None, grid_range=(0, 100))

        expected_feature_grids = np.array([0.0, 13.0, 35.11111111])
        expected_percentile_info = np.array(['(0.0)', '(44.44)', '(77.78)'])

        assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8)
        assert_array_equal(percentile_info[[0, 4, 7]], expected_percentile_info)
Beispiel #4
0
    def test_grids_percentile_range(self, titanic_data):
        # percentile_range=(5, 95)
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='percentile',
            percentile_range=(5, 95), grid_range=None)

        expected_feature_grids = np.array([7.225, 13.0, 31.0])
        expected_percentile_info = np.array(['(5.0)', '(45.0)', '(75.0)'])

        assert_array_equal(feature_grids[[0, 4, 7]], expected_feature_grids)
        assert_array_equal(percentile_info[[0, 4, 7]], expected_percentile_info)
Beispiel #5
0
    def test_grids_15(self, titanic_data):
        # num_grid_points=15
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data['Fare'].values, num_grid_points=15, grid_type='percentile',
            percentile_range=None, grid_range=None)

        expected_feature_grids = np.array([0.0, 8.05, 14.4542, 37.0042])
        expected_percentile_info = np.array(['(0.0)', '(28.57)', '(50.0)', '(78.57)'])

        assert_array_equal(feature_grids[[0, 4, 7, 11]], expected_feature_grids)
        assert_array_equal(percentile_info[[0, 4, 7, 11]], expected_percentile_info)
Beispiel #6
0
def explain(ice_exp: Explanation, training_df, test_df, explanation_target,
            prefix_target):
    job = ice_exp.job
    training_df = training_df.drop(['trace_id'], 1)
    if job.encoding.value_encoding == ValueEncodings.BOOLEAN.value:
        training_df['label'] = training_df['label'].astype(bool).astype(
            int) + 1

    feature_grids, percentile_info = _get_grids(
        feature_values=training_df[explanation_target].values,
        num_grid_points=10,
        grid_type=None,
        percentile_range='percentile',
        grid_range=None)
    custom_grids = [
        x
        for x in range(int(feature_grids.min()), int(feature_grids.max() - 1))
    ]

    fig, axes, summary_df = info_plots.target_plot(
        df=training_df,
        feature=explanation_target,
        feature_name='feature value',
        cust_grid_points=custom_grids,
        target='label',
        show_percentile=False)
    lists = list(training_df[explanation_target].values)
    indexs = [
        lists.index(x)
        for x in range(int(feature_grids.min()), int(feature_grids.max() - 1))
    ]
    encoder = retrieve_proper_encoder(job)
    encoder.decode(training_df, job.encoding)
    values = training_df[explanation_target].values
    lst = []
    if job.encoding.value_encoding != ValueEncodings.BOOLEAN.value:
        for x in range(len(indexs) - 1):
            lst.append({
                'value': values[indexs[x]],
                'label': summary_df['label'][x],
                'count': int(summary_df['count'][x]),
            })
    else:
        for x in range(summary_df.shape[0]):
            lst.append({
                'value': summary_df['display_column'][x],
                'label': summary_df['label'][x],
                'count': int(summary_df['count'][x]),
            })
    return lst
Beispiel #7
0
    def test_grids_grid_type(self, titanic_data):
        # grid_type='equal'
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data["Fare"].values,
            num_grid_points=10,
            grid_type="equal",
            percentile_range=None,
            grid_range=None,
        )

        expected_feature_grids = np.array([0.0, 227.70186666, 398.47826666])
        expected_percentile_info = []

        assert_array_almost_equal(feature_grids[[0, 4, 7]],
                                  expected_feature_grids,
                                  decimal=8)
        assert percentile_info == expected_percentile_info
Beispiel #8
0
    def test_grids_default(self, titanic_data):
        # default setting
        feature_grids, percentile_info = _get_grids(
            feature_values=titanic_data["Fare"].values,
            num_grid_points=10,
            grid_type="percentile",
            percentile_range=None,
            grid_range=None,
        )

        expected_feature_grids = np.array([0.0, 13.0, 35.11111111])
        expected_percentile_info = np.array(["(0.0)", "(44.44)", "(77.78)"])

        assert_array_almost_equal(feature_grids[[0, 4, 7]],
                                  expected_feature_grids,
                                  decimal=8)
        assert_array_equal(percentile_info[[0, 4, 7]],
                           expected_percentile_info)