ensemble_predictions.append(clf.predict_proba(test_examples)[:, 1])

    ensemble_predictions = np.mean(ensemble_predictions, axis=0)

    save_fname = f'Ensemble_{time}_{target}_{drop_opt}.pkl'
    ds_fname = join(
        config.ML_RESULTS_PATH,
        'verification_results_' + save_fname.replace('.pkl', '.nc'))

    if f'{time}_{target}' not in n_iter_dict:
        bootstrap_indices = [
            np.random.choice(len(test_targets), size=len(test_targets))
            for i in range(n_iter)
        ]
        n_iter_dict[f'{time}_{target}'] = bootstrap_indices

    bootstrap_indices = n_iter_dict[f'{time}_{target}']

    score_ds = compute_multiple_metrics(
        test_targets,
        ensemble_predictions,
        bootstrap_indices,
        metrics,
        metric_names,
        metric_dimensions,
    )

    score_ds.to_netcdf(ds_fname)

send_email('Ensemble model evaluations are done running!')
    X_subset, y_subset = get_independent_samples(X, y, info)
    explainer = InterpretToolkit(estimators=estimators,
                                 estimator_names=model_names,
                                 X=X_subset.copy(),
                                 y=y_subset.copy())
    background_dataset = shap.sample(X, 100)

    results = explainer.local_contributions(
        method='shap',
        background_dataset=background_dataset,
        performance_based=True,
        n_samples=n_samples)

    results = explainer.save(fname=save_fname, data=results)

    duration = datetime.datetime.now() - start_time
    seconds = duration.total_seconds()
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60

    message = f""" 
               SHAP Feature Contributions values for {target} {time}  are done!
               Started at {start_time.strftime("%I:%M %p")}, 
               Duration : {hours:.2f} hours : {minutes:.2f} minutes : {seconds:.2f} seconds
              """
    send_email(message)

message = 'SHAP value computation are done running!'
send_email(message)
Exemple #3
0
    #    print(f'Computing for {ds_fname}...')

    try:
        save_fname = f'{model_name}_{time}_{target}_{resample_method}_{normalize_method}_{imputer_method}{drop_opt}{feature_selection_method}.pkl'
        clf = joblib.load(join(config.ML_MODEL_SAVE_PATH, save_fname))['model']
    except:
        print(f'{save_fname} DOES NOT EXIST!')
        continue

    if f'{time}_{target}' not in n_iter_dict:
        bootstrap_indices = [
            np.random.choice(len(test_targets), size=len(test_targets))
            for i in range(n_iter)
        ]
        n_iter_dict[f'{time}_{target}'] = bootstrap_indices

    bootstrap_indices = n_iter_dict[f'{time}_{target}']
    predictions = clf.predict_proba(test_examples)[:, 1]
    score_ds = compute_multiple_metrics(
        test_targets,
        predictions,
        bootstrap_indices,
        metrics,
        metric_names,
        metric_dimensions,
    )

    score_ds.to_netcdf(ds_fname)

send_email('Model Evaluations are done running!')