}

iterator = itertools.product(time_set, target_set)
n_iter_dict = {}

for combo in iterator:
    time, target = combo

    # LOAD DATA
    parameters = {
        'time': time,
        'target': target,
        'drop_opt': drop_opt,
        'model_name': None
    }
    test_examples, test_targets, info = _load_test_data(**parameters,
                                                        return_info=True)

    print('\n')
    print('-' * 50)
    print('Evaluating on the testing dataset....')
    print(f'Valid Time  : {time}')
    print(f'Drop Option : {drop_opt}')
    print(f'Target      : {target}')
    print('-' * 50)

    if independent_samples:
        test_times = info['Run Time'].values
        test_fti = info["FCST_TIME_IDX"].values.astype(int)
        idx = np.where((info['Run Time'].isin(init_times).values)
                       & (test_fti % 6 == 0))
        test_examples = test_examples.iloc[idx]
n_samples = 5

save_fname = join(
    path,
    f'shap_values_performance_{model_names[0]}_{target}_{time}{drop_opt}.pkl')
estimators = load_models(time, target, drop_opt, model_names)
explainer = InterpretToolkit(estimators=estimators,
                             estimator_names=model_names,
                             X=X.copy(),
                             y=np.copy(y))

predictions = estimators[0].predict_proba(X)[:, 1]
print(np.max(predictions))

X_test, y_test, _ = _load_test_data(return_info=True, **parameters)
_predictions = estimators[0].predict_proba(X_test)[:, 1]
print(np.sort(_predictions)[::-1])

performance_dict = get_indices_based_on_performance(
    estimator=estimators[0],
    X=X,
    y=y,
    n_samples=n_samples,
    estimator_output='probability',
)

print('\n')
for key in ['Best Hits', 'Worst False Alarms']:
    idxs = performance_dict[key]
    print(predictions[idxs])
combos = itertools.product(time_set, target_set, direction_set)

############################################
for combo in combos:
    start_time = datetime.datetime.now()
    time, target, direction = combo
    parameters = {
        'time': time,
        'target': target,
        'drop_opt': drop_opt,
    }
    if data_mode == 'training':
        X, y = _load_train_data(**parameters)
    else:
        X, y = _load_test_data(**parameters)

    n_vars = 10 if len(X.columns) else len(X.columns)

    # Load the models
    estimators = load_models(time, target, drop_opt, model_names)
    explainer = InterpretToolkit(estimators=estimators,
                                 estimator_names=model_names,
                                 X=X,
                                 y=y)

    # Compute the importance
    results = explainer.permutation_importance(n_vars=n_vars,
                                               evaluation_fn=metric,
                                               subsample=subsample,
                                               n_jobs=n_jobs,
Exemple #4
0
        parameters = {
                'time' : time,
                'model_name' : model_name,
                'target' : target,
                'resample' : resample,
                'normalize' : normalize,
                'imputer' : imputer_method,
                'drop_opt' : drop_opt
                 }

        print('First load of the data...')
        if data_mode == 'training':
            examples, target_values = _load_train_data(**parameters)
        else:
            examples, target_values = _load_test_data(**parameters)

        #feature_names = list(examples.columns)
        #feature_names.remove('Run Date') 

    calibrated_pipeline = _load_model(**parameters)['model']
    #model = calibrated_pipeline.calibrated_classifiers_[0].base_estimator
    #examples_transformed, targets_transformed = just_transforms(model, examples, target_values)
   
    #if resample == 'under':
    #    model_names = [model_name+'_under']
    #else:
    #    model_names = [model_name]

    #if calibrate != '' :
    #    models=[calibrated_pipeline]
Exemple #5
0
n_iter_dict = {}

for combo in iterator:
    time, model_name, pair = combo
    target_var, resample_method, normalize_method = pair

    print(model_name, target_var)
    parameters = {
        'time': time,
        'target': target_var,
        'drop_opt': drop_opt,
    }

    print('First load of the data...')
    test_examples, test_targets = _load_test_data(**parameters)

    print(test_examples.columns)
    print(len(test_examples.columns))
    if 'Log' in model_name:
        normalize_method = 'standard'

    save_fname = f'{model_name}_{time}_{target_var}_{resample_method}_{normalize_method}_{imputer_method}{drop_opt}.pkl'

    print(f'Loading {save_fname}...')

    clf = joblib.load(join(config.ML_MODEL_SAVE_PATH, save_fname))

    try:
        predictions = clf.predict_proba(test_examples)[:, 1]
    except:
Exemple #6
0
from wofs.util import config
from wofs_ml.common.load_results import _load_train_data, _load_test_data, _load_model, just_transforms

parameters = {
    'time': 'first_hour',
    'target': 'tornado',
    'resample': 'under',
    'normalize': 'standard',
    'imputer': 'simple',
    'drop_opt': '',
    'model_name': None
}

print('First load of the data...')
train_examples, _ = _load_train_data(**parameters)
test_examples, _ = _load_test_data(**parameters)

predictor = ['lcl_ml_ens_mean_spatial_mean']

train_examples = train_examples[predictor].values
test_examples = test_examples[predictor].values

fig = plt.figure(figsize=(6, 6), dpi=300)
plt.hist(train_examples,
         bins='auto',
         color='lightgreen',
         alpha=0.8,
         rwidth=0.85,
         log=True)
plt.hist(test_examples,
         bins='auto',