def handle(self, *args, **kwargs): # get model TARGET_MODEL = 59 job = Job.objects.filter(pk=TARGET_MODEL)[0] model = joblib.load(job.predictive_model.model_path)[0] # load data training_df, test_df = get_encoded_logs(job) training_df['label'] = training_df['label'].astype(bool).astype(int) columns = list(training_df.columns.values) features = list( training_df.drop(['trace_id', 'label'], 1).columns.values) feature = 'Age_1' feature_grids, percentile_info = _get_grids( feature_values=training_df[feature].values, num_grid_points=10, grid_type=None, percentile_range='percentile', grid_range=None) custom_grids = [] indexs = [] for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)): custom_grids.append(x) print(features) fig, axes, summary_df = info_plots.target_plot( df=training_df, feature=feature, feature_name='feature value', cust_grid_points=custom_grids, target='label', show_percentile=False) fig.savefig('ice_plot_train_1_3_CType.png') lists = list(training_df[feature].values) for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)): indexs.append(lists.index(x)) encoder = retrieve_proper_encoder(job) encoder.decode(training_df, job.encoding) values = training_df[feature].values training_df lst = [] print(summary_df) if job.encoding.value_encoding != ValueEncodings.BOOLEAN.value: for x in range(len(indexs) - 1): lst.append({ 'value': values[indexs[x]], 'label': summary_df['label'][x], 'count': summary_df['count'][x], }) else: for x in range(summary_df.shape[0]): lst.append({ 'value': summary_df['display_column'][x], 'label': summary_df['label'][x], 'count': summary_df['count'][x], }) print(lst)
def test_grids_grid_range_equal(self, titanic_data): # grid_range=(0, 100), grid_type='equal' feature_grids, percentile_info = _get_grids( feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='equal', percentile_range=None, grid_range=(0, 100)) expected_feature_grids = np.array([0.0, 44.44444444, 77.77777777]) expected_percentile_info = [] assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8) assert percentile_info == expected_percentile_info
def test_grids_grid_range_percentile(self, titanic_data): # grid_range=(0, 100), grid_type='percentile' feature_grids, percentile_info = _get_grids( feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='percentile', percentile_range=None, grid_range=(0, 100)) expected_feature_grids = np.array([0.0, 13.0, 35.11111111]) expected_percentile_info = np.array(['(0.0)', '(44.44)', '(77.78)']) assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8) assert_array_equal(percentile_info[[0, 4, 7]], expected_percentile_info)
def test_grids_percentile_range(self, titanic_data): # percentile_range=(5, 95) feature_grids, percentile_info = _get_grids( feature_values=titanic_data['Fare'].values, num_grid_points=10, grid_type='percentile', percentile_range=(5, 95), grid_range=None) expected_feature_grids = np.array([7.225, 13.0, 31.0]) expected_percentile_info = np.array(['(5.0)', '(45.0)', '(75.0)']) assert_array_equal(feature_grids[[0, 4, 7]], expected_feature_grids) assert_array_equal(percentile_info[[0, 4, 7]], expected_percentile_info)
def test_grids_15(self, titanic_data): # num_grid_points=15 feature_grids, percentile_info = _get_grids( feature_values=titanic_data['Fare'].values, num_grid_points=15, grid_type='percentile', percentile_range=None, grid_range=None) expected_feature_grids = np.array([0.0, 8.05, 14.4542, 37.0042]) expected_percentile_info = np.array(['(0.0)', '(28.57)', '(50.0)', '(78.57)']) assert_array_equal(feature_grids[[0, 4, 7, 11]], expected_feature_grids) assert_array_equal(percentile_info[[0, 4, 7, 11]], expected_percentile_info)
def explain(ice_exp: Explanation, training_df, test_df, explanation_target, prefix_target): job = ice_exp.job training_df = training_df.drop(['trace_id'], 1) if job.encoding.value_encoding == ValueEncodings.BOOLEAN.value: training_df['label'] = training_df['label'].astype(bool).astype( int) + 1 feature_grids, percentile_info = _get_grids( feature_values=training_df[explanation_target].values, num_grid_points=10, grid_type=None, percentile_range='percentile', grid_range=None) custom_grids = [ x for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)) ] fig, axes, summary_df = info_plots.target_plot( df=training_df, feature=explanation_target, feature_name='feature value', cust_grid_points=custom_grids, target='label', show_percentile=False) lists = list(training_df[explanation_target].values) indexs = [ lists.index(x) for x in range(int(feature_grids.min()), int(feature_grids.max() - 1)) ] encoder = retrieve_proper_encoder(job) encoder.decode(training_df, job.encoding) values = training_df[explanation_target].values lst = [] if job.encoding.value_encoding != ValueEncodings.BOOLEAN.value: for x in range(len(indexs) - 1): lst.append({ 'value': values[indexs[x]], 'label': summary_df['label'][x], 'count': int(summary_df['count'][x]), }) else: for x in range(summary_df.shape[0]): lst.append({ 'value': summary_df['display_column'][x], 'label': summary_df['label'][x], 'count': int(summary_df['count'][x]), }) return lst
def test_grids_grid_type(self, titanic_data): # grid_type='equal' feature_grids, percentile_info = _get_grids( feature_values=titanic_data["Fare"].values, num_grid_points=10, grid_type="equal", percentile_range=None, grid_range=None, ) expected_feature_grids = np.array([0.0, 227.70186666, 398.47826666]) expected_percentile_info = [] assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8) assert percentile_info == expected_percentile_info
def test_grids_default(self, titanic_data): # default setting feature_grids, percentile_info = _get_grids( feature_values=titanic_data["Fare"].values, num_grid_points=10, grid_type="percentile", percentile_range=None, grid_range=None, ) expected_feature_grids = np.array([0.0, 13.0, 35.11111111]) expected_percentile_info = np.array(["(0.0)", "(44.44)", "(77.78)"]) assert_array_almost_equal(feature_grids[[0, 4, 7]], expected_feature_grids, decimal=8) assert_array_equal(percentile_info[[0, 4, 7]], expected_percentile_info)