ensemble_predictions.append(clf.predict_proba(test_examples)[:, 1]) ensemble_predictions = np.mean(ensemble_predictions, axis=0) save_fname = f'Ensemble_{time}_{target}_{drop_opt}.pkl' ds_fname = join( config.ML_RESULTS_PATH, 'verification_results_' + save_fname.replace('.pkl', '.nc')) if f'{time}_{target}' not in n_iter_dict: bootstrap_indices = [ np.random.choice(len(test_targets), size=len(test_targets)) for i in range(n_iter) ] n_iter_dict[f'{time}_{target}'] = bootstrap_indices bootstrap_indices = n_iter_dict[f'{time}_{target}'] score_ds = compute_multiple_metrics( test_targets, ensemble_predictions, bootstrap_indices, metrics, metric_names, metric_dimensions, ) score_ds.to_netcdf(ds_fname) send_email('Ensemble model evaluations are done running!')
X_subset, y_subset = get_independent_samples(X, y, info) explainer = InterpretToolkit(estimators=estimators, estimator_names=model_names, X=X_subset.copy(), y=y_subset.copy()) background_dataset = shap.sample(X, 100) results = explainer.local_contributions( method='shap', background_dataset=background_dataset, performance_based=True, n_samples=n_samples) results = explainer.save(fname=save_fname, data=results) duration = datetime.datetime.now() - start_time seconds = duration.total_seconds() hours = seconds // 3600 minutes = (seconds % 3600) // 60 seconds = seconds % 60 message = f""" SHAP Feature Contributions values for {target} {time} are done! Started at {start_time.strftime("%I:%M %p")}, Duration : {hours:.2f} hours : {minutes:.2f} minutes : {seconds:.2f} seconds """ send_email(message) message = 'SHAP value computation are done running!' send_email(message)
# print(f'Computing for {ds_fname}...') try: save_fname = f'{model_name}_{time}_{target}_{resample_method}_{normalize_method}_{imputer_method}{drop_opt}{feature_selection_method}.pkl' clf = joblib.load(join(config.ML_MODEL_SAVE_PATH, save_fname))['model'] except: print(f'{save_fname} DOES NOT EXIST!') continue if f'{time}_{target}' not in n_iter_dict: bootstrap_indices = [ np.random.choice(len(test_targets), size=len(test_targets)) for i in range(n_iter) ] n_iter_dict[f'{time}_{target}'] = bootstrap_indices bootstrap_indices = n_iter_dict[f'{time}_{target}'] predictions = clf.predict_proba(test_examples)[:, 1] score_ds = compute_multiple_metrics( test_targets, predictions, bootstrap_indices, metrics, metric_names, metric_dimensions, ) score_ds.to_netcdf(ds_fname) send_email('Model Evaluations are done running!')