def _get_data(subchunk, config): features_names = geoio.feature_names(config) # NOTE: This returns an *untransformed* x, # which is ok as we just need dummies here if config.mask: mask_x = _mask(subchunk, config) all_mask_x = np.ma.vstack(mpiops.comm.allgather(mask_x)) if all_mask_x.shape[0] == np.sum(all_mask_x.mask): x = np.ma.zeros((mask_x.shape[0], len(features_names)), dtype=np.bool) x.mask = True log.info('Partition {} covariates are not loaded as ' 'the partition is entirely masked.'.format(subchunk + 1)) return x, features_names transform_sets = [k.transform_set for k in config.feature_sets] extracted_chunk_sets = geoio.image_subchunks(subchunk, config) log.info("Applying feature transforms") x = features.transform_features(extracted_chunk_sets, transform_sets, config.final_transform, config)[0] # only check/correct float32 conversion for Ensemble models if not config.clustering: if (isinstance(modelmaps[config.algorithm](), BaseEnsemble) or config.multirandomforest): x = _fix_for_corrupt_data(x, features_names) return _mask_rows(x, subchunk, config), features_names
def permutation_importance(model, x_all, targets_all, config): _logger.info("Computing permutation importance!!") if config.algorithm not in transformed_modelmaps.keys(): raise AttributeError("Only the following can be used for permutation " "importance {}".format( list(transformed_modelmaps.keys()))) y = targets_all.observations classification = hasattr(model, 'predict_proba') if not classification: for score in ['explained_variance', 'r2', 'neg_mean_absolute_error', 'neg_mean_squared_error']: pi_cv = apply_multiple_masked( PermutationImportance(model, scoring=score, cv='prefit', n_iter=10, refit=False).fit, data=(x_all, y) ) feature_names = geoio.feature_names(config) df_picv = eli5.explain_weights_df( pi_cv, feature_names=feature_names, top=100) csv = Path(config.output_dir).joinpath( config.name + "_permutation_importance_{}.csv".format( score)).as_posix() df_picv.to_csv(csv, index=False)
def _get_data(subchunk, config): features_names = geoio.feature_names(config) if config.mask: mask_x = _mask(subchunk, config) all_mask_x = np.ma.vstack(mpiops.comm.allgather(mask_x)) if all_mask_x.shape[0] == np.sum(all_mask_x.mask): x = np.ma.zeros((mask_x.shape[0], len(features_names)), dtype=np.bool) x.mask = True log.info('Partition {} covariates are not loaded as ' 'the partition is entirely masked.'.format(subchunk + 1)) return x, features_names extracted_chunk_sets = geoio.image_subchunks(subchunk, config) transform_sets = [k.transform_set for k in config.feature_sets] log.info("Applying feature transforms") x = features.transform_features(extracted_chunk_sets, transform_sets, config.final_transform, config)[0] return _mask_rows(x, subchunk, config), features_names