コード例 #1
0
 def _score_with_pred_proba(self,
                            y,
                            y_internal,
                            y_pred_proba_internal,
                            metric,
                            sample_weight=None,
                            weight_evaluation=None):
     metric = get_metric(metric, self.problem_type, 'leaderboard_metric')
     if weight_evaluation is None:
         weight_evaluation = self.weight_evaluation
     if metric.needs_pred:
         if self.problem_type == BINARY:
             # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
             y_pred = get_pred_from_proba(y_pred_proba_internal, problem_type=self.problem_type)
             y_tmp = y_internal
         else:
             y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
             y_tmp = y
     elif metric.needs_quantile:
         y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
         y_tmp = y
     else:
         y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=False)
         y_tmp = y_internal
     return compute_weighted_metric(y_tmp, y_pred, metric, weights=sample_weight, weight_evaluation=weight_evaluation, quantile_levels=self.quantile_levels)
コード例 #2
0
ファイル: basic_v1.py プロジェクト: ssheff/autogluon
    def evaluate(self, valid_data, metrics):
        """ Report the predictive performance evaluated for a given dataset.
            
            Parameters
            ----------
            valid_data : str or :class:`TabularDataset` or `pandas.DataFrame`
                This Dataset must also contain the label-column with the same column-name as specified during `fit()`.
                If str is passed, `valid_data` will be loaded using the str value as the file path.
            metrics : List[str]
                A list of names of metrics to report.
 
            Returns
            -------
            Dict mapping metric -> score calculated over the given dataset.
        """
        if isinstance(metrics, str):
            metrics = [metrics]
        assert self.net is not None
        if not isinstance(valid_data, TabularDataset):
            valid_data = TabularDataset(
                valid_data,
                columns=self._feature_columns + self._label_columns,
                column_properties=self._column_properties)
        ground_truth = np.array(valid_data.table[self._label_columns[0]].apply(
            self._column_properties[self._label_columns[0]].transform))
        if self._problem_types[0] == _C.CLASSIFICATION:
            predictions = self.predict_proba(valid_data)
        else:
            predictions = self.predict(valid_data)
        metric_scores = {
            metric: calculate_metric(get_metric(metric), ground_truth,
                                     predictions, self.problem_types[0])
            for metric in metrics
        }
        return metric_scores
コード例 #3
0
    def __init__(self, path_context: str, label: str, feature_generator: PipelineFeatureGenerator, ignored_columns: list = None, label_count_threshold=10,
                 problem_type=None, eval_metric=None, positive_class=None, cache_data=True, is_trainer_present=False, random_state=0):
        self.path, self.model_context, self.save_path = self.create_contexts(path_context)
        self.label = label
        self.ignored_columns = ignored_columns
        if self.ignored_columns is None:
            self.ignored_columns = []
        self.threshold = label_count_threshold
        self.problem_type = problem_type
        self.eval_metric = get_metric(eval_metric, self.problem_type, 'eval_metric')
        self.cache_data = cache_data
        if not self.cache_data:
            logger.log(30, 'Warning: `cache_data=False` will disable or limit advanced functionality after training such as feature importance calculations. It is recommended to set `cache_data=True` unless you explicitly wish to not have the data saved to disk.')
        self.is_trainer_present = is_trainer_present
        if random_state is None:
            random_state = random.randint(0, 1000000)
        self.random_state = random_state
        self.cleaner = None
        self.label_cleaner: LabelCleaner = None
        self.feature_generator: PipelineFeatureGenerator = feature_generator

        self.trainer: AbstractTrainer = None
        self.trainer_type = None
        self.trainer_path = None
        self.reset_paths = False

        self._positive_class = positive_class

        try:
            from ..version import __version__
            self.version = __version__
        except:
            self.version = None
コード例 #4
0
    def __init__(self,
                 path_context: str,
                 label: str,
                 feature_generator: PipelineFeatureGenerator,
                 ignored_columns: list = None,
                 label_count_threshold=10,
                 problem_type=None,
                 eval_metric=None,
                 positive_class=None,
                 cache_data=True,
                 is_trainer_present=False,
                 random_state=0,
                 sample_weight=None,
                 weight_evaluation=False):
        self.path, self.model_context, self.save_path = self.create_contexts(
            path_context)
        self.label = label
        self.ignored_columns = ignored_columns
        if self.ignored_columns is None:
            self.ignored_columns = []
        self.threshold = label_count_threshold
        self.problem_type = problem_type
        self.eval_metric = get_metric(eval_metric, self.problem_type,
                                      'eval_metric')
        self.cache_data = cache_data
        if not self.cache_data:
            logger.log(
                30,
                'Warning: `cache_data=False` will disable or limit advanced functionality after training such as feature importance calculations. It is recommended to set `cache_data=True` unless you explicitly wish to not have the data saved to disk.'
            )
        self.is_trainer_present = is_trainer_present
        if random_state is None:
            random_state = random.randint(0, 1000000)
        self.random_state = random_state
        self.cleaner = None
        self.label_cleaner: LabelCleaner = None
        self.feature_generator: PipelineFeatureGenerator = feature_generator

        self.trainer: AbstractTrainer = None
        self.trainer_type = None
        self.trainer_path = None
        self.reset_paths = False

        self._positive_class = positive_class
        self.sample_weight = sample_weight
        self.weight_evaluation = weight_evaluation
        if sample_weight is not None and not isinstance(sample_weight, str):
            raise ValueError(
                "sample_weight must be a string indicating the name of column that contains sample weights. If you have a vector of sample weights, first add these as an extra column to your data."
            )
        if weight_evaluation and sample_weight is None:
            raise ValueError(
                "Must specify sample_weight column if you specify weight_evaluation=True"
            )
        try:
            from ..version import __version__
            self.version = __version__
        except:
            self.version = None
コード例 #5
0
def estimate_importance(dataset, model_name):
    if os.path.exists(
            os.path.join('feature_importance', dataset, model_name,
                         'importance.csv')):
        print(f'Found {dataset}, {model_name}')
        return
    model_remote_path = stat_df.loc[model_name, dataset]
    postfix = '/test_score.json'

    remote_dir_name = model_remote_path[:-len(postfix)]

    def downloadDirectoryFroms3(bucketName, remoteDirectoryName,
                                local_dir_path):
        s3_resource = boto3.resource('s3')
        bucket = s3_resource.Bucket(bucketName)
        for obj in bucket.objects.filter(Prefix=remoteDirectoryName):
            print(obj.key)
            download_path = os.path.join(local_dir_path, obj.key)
            if not os.path.exists(os.path.dirname(download_path)):
                os.makedirs(os.path.dirname(download_path), exist_ok=True)
            bucket.download_file(obj.key, download_path)

    local_dir_name = os.path.join(download_path, remote_dir_name)
    if os.path.exists(local_dir_name):
        pass
    else:
        downloadDirectoryFroms3('automl-mm-bench', remote_dir_name,
                                download_path)
    test_dataset = dataset_registry.create(dataset, 'test')
    if model_name == MULTIMODAL_TEXT_MODEL_NAME:
        predictor = MultiModalTextModel.load(
            os.path.join(local_dir_name, 'saved_model'))
    elif model_name == TABULAR_MODEL_NAME:
        predictor = TabularPredictor.load(os.path.join(local_dir_name))
    elif model_name == STACK_ENSEMBLE_MODEL_NAME:
        predictor = TabularPredictor.load(os.path.join(local_dir_name))
    else:
        raise NotImplementedError
    sample_size = min(len(test_dataset.data), 1000)
    if model_name == TABULAR_MODEL_NAME:
        importance_df = predictor.feature_importance(
            test_dataset.data[test_dataset.feature_columns +
                              test_dataset.label_columns],
            subsample_size=sample_size)
    else:
        importance_df = compute_permutation_feature_importance(
            test_dataset.data[test_dataset.feature_columns],
            test_dataset.data[test_dataset.label_columns[0]],
            predict_func=predictor.predict,
            eval_metric=get_metric(test_dataset.metric),
            subsample_size=sample_size,
            num_shuffle_sets=3)
    os.makedirs(os.path.join('feature_importance', dataset, model_name),
                exist_ok=True)
    importance_df.to_csv(
        os.path.join('feature_importance', dataset, model_name,
                     'importance.csv'))
    print(importance_df)
コード例 #6
0
 def _get_default_stopping_metric(self):
     if self.eval_metric.name == 'roc_auc':
         stopping_metric = 'log_loss'
     else:
         stopping_metric = self.eval_metric
     stopping_metric = metrics.get_metric(stopping_metric,
                                          self.problem_type,
                                          'stopping_metric')
     return stopping_metric
コード例 #7
0
    def __init__(self, path_context: str, label: str, feature_generator: PipelineFeatureGenerator, ignored_columns: list = None, label_count_threshold=10,
                 problem_type=None, quantile_levels=None, eval_metric=None, positive_class=None, cache_data=True, is_trainer_present=False, random_state=0, sample_weight=None, weight_evaluation=False):
        self.path, self.model_context, self.save_path = self.create_contexts(path_context)
        self.label = label
        self.ignored_columns = ignored_columns
        if self.ignored_columns is None:
            self.ignored_columns = []
        self.threshold = label_count_threshold
        self.problem_type = problem_type
        self.eval_metric = get_metric(eval_metric, self.problem_type, 'eval_metric')

        if self.problem_type == QUANTILE and quantile_levels is None:
            raise ValueError("if `problem_type='quantile'`, `quantile_levels` has to be specified")
        if isinstance(quantile_levels, float):
            quantile_levels = [quantile_levels]
        if isinstance(quantile_levels, Iterable):
            for quantile in quantile_levels:
                if quantile <= 0.0 or quantile >= 1.0:
                    raise ValueError("quantile values have to be non-negative and less than 1.0 (0.0 < q < 1.0). "
                                     "For example, 0.95 quantile = 95 percentile")
            quantile_levels = np.sort(np.array(quantile_levels))
        self.quantile_levels = quantile_levels

        self.cache_data = cache_data
        if not self.cache_data:
            logger.log(30, 'Warning: `cache_data=False` will disable or limit advanced functionality after training such as feature importance calculations. It is recommended to set `cache_data=True` unless you explicitly wish to not have the data saved to disk.')
        self.is_trainer_present = is_trainer_present
        if random_state is None:
            random_state = random.randint(0, 1000000)
        self.random_state = random_state
        self.cleaner = None
        self.label_cleaner: LabelCleaner = None
        self.feature_generator: PipelineFeatureGenerator = feature_generator

        self.trainer: AbstractTrainer = None
        self.trainer_type = None
        self.trainer_path = None
        self.reset_paths = False

        self._pre_X_rows = None
        self._post_X_rows = None
        self._positive_class = positive_class
        self.sample_weight = sample_weight
        self.weight_evaluation = weight_evaluation
        if sample_weight is not None and not isinstance(sample_weight, str):
            raise ValueError("sample_weight must be a string indicating the name of column that contains sample weights. If you have a vector of sample weights, first add these as an extra column to your data.")
        if weight_evaluation and sample_weight is None:
            raise ValueError("Must specify sample_weight column if you specify weight_evaluation=True")
        try:
            from ..version import __version__
            self.version = __version__
        except:
            self.version = None
コード例 #8
0
 def _score_with_pred(self,
                      y,
                      y_internal,
                      y_pred_internal,
                      metric,
                      sample_weight=None):
     metric = get_metric(metric, self.problem_type, 'leaderboard_metric')
     if self.problem_type == BINARY:
         # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
         y_pred = y_pred_internal
         y_tmp = y_internal
     else:
         y_pred = self.label_cleaner.inverse_transform(y_pred_internal)
         y_tmp = y
     return compute_weighted_metric(
         y_tmp,
         y_pred,
         metric,
         weights=sample_weight,
         weight_evaluation=self.weight_evaluation,
         quantile_levels=self.quantile_levels)
コード例 #9
0
    def evaluate_predictions(self, y_true, y_pred, sample_weight=None, silent=False, auxiliary_metrics=True, detailed_report=False):
        """ Evaluate predictions. Does not support sample weights since this method reports a variety of metrics.
            Args:
                silent (bool): Should we print which metric is being used as well as performance.
                auxiliary_metrics (bool): Should we compute other (problem_type specific) metrics in addition to the default metric?
                detailed_report (bool): Should we computed more-detailed versions of the auxiliary_metrics? (requires auxiliary_metrics=True).

            Returns single performance-value if auxiliary_metrics=False.
            Otherwise returns dict where keys = metrics, values = performance along each metric.
        """

        is_proba = False
        assert isinstance(y_true, (np.ndarray, pd.Series))
        assert isinstance(y_pred, (np.ndarray, pd.Series, pd.DataFrame))
        self._validate_class_labels(y_true)
        if isinstance(y_pred, np.ndarray):
            if self.problem_type == QUANTILE:
                y_pred = pd.DataFrame(data=y_pred, columns=self.quantile_levels)
            elif len(y_pred.shape) > 1:
                y_pred = pd.DataFrame(data=y_pred, columns=self.class_labels)

        if isinstance(y_pred, pd.DataFrame):
            is_proba = True
        elif not self.eval_metric.needs_pred:
            raise AssertionError(f'`evaluate_predictions` requires y_pred_proba input '
                                 f'when evaluating "{self.eval_metric.name}"... Please generate valid input via `predictor.predict_proba(data)`.\n'
                                 f'This may have occurred if you passed in predict input instead of predict_proba input, '
                                 f'or if you specified `as_multiclass=False` to `predictor.predict_proba(data, as_multiclass=False)`, '
                                 f'which is not supported by `evaluate_predictions`.')
        if is_proba:
            y_pred_proba = y_pred
            y_pred = get_pred_from_proba_df(y_pred_proba, problem_type=self.problem_type)
            if self.problem_type == BINARY:
                # roc_auc crashes if this isn't done
                y_pred_proba = y_pred_proba[self.positive_class]
        else:
            y_pred_proba = None
            y_pred = pd.Series(y_pred)
        if y_pred_proba is not None:
            y_pred_proba_internal = self.label_cleaner.transform_proba(y_pred_proba, as_pandas=True)
        else:
            y_pred_proba_internal = None
        y_true_internal = self.label_cleaner.transform(y_true)  # Get labels in numeric order
        y_true_internal = y_true_internal.fillna(-1)
        y_pred_internal = self.label_cleaner.transform(y_pred)  # Get labels in numeric order

        # Compute auxiliary metrics:
        auxiliary_metrics_lst = [self.eval_metric]
        performance_dict = {}

        if auxiliary_metrics:
            if self.problem_type == REGRESSION:  # Adding regression metrics
                auxiliary_metrics_lst += [
                    'root_mean_squared_error',
                    'mean_squared_error',
                    'mean_absolute_error',
                    'r2',
                    'pearsonr',
                    'median_absolute_error',
                ]
            if self.problem_type in [BINARY, MULTICLASS]:  # Adding classification metrics
                auxiliary_metrics_lst += [
                    'accuracy',
                    'balanced_accuracy',
                    # 'log_loss',  # Don't include as it probably adds more confusion to novice users (can be infinite)
                    'mcc',
                ]
            if self.problem_type == BINARY:  # binary-specific metrics
                auxiliary_metrics_lst += [
                    'roc_auc',
                    'f1',
                    'precision',
                    'recall',
                ]

        scoring_args = dict(
            y=y_true,
            y_internal=y_true_internal,
            weight_evaluation=False,
        )

        if sample_weight is not None:
            scoring_args['sample_weight'] = sample_weight
            scoring_args['weight_evaluation'] = True

        for aux_metric in auxiliary_metrics_lst:
            if isinstance(aux_metric, str):
                aux_metric = get_metric(metric=aux_metric, problem_type=self.problem_type, metric_type='aux_metric')
            if not aux_metric.needs_pred and y_pred_proba_internal is None:
                logger.log(15, f'Skipping {aux_metric.name} because no prediction probabilities are available to score.')
                continue

            if aux_metric.name not in performance_dict:
                if y_pred_proba_internal is not None:
                    score = self._score_with_pred_proba(
                        y_pred_proba_internal=y_pred_proba_internal,
                        metric=aux_metric,
                        **scoring_args
                    )
                else:
                    score = self._score_with_pred(
                        y_pred_internal=y_pred_internal,
                        metric=aux_metric,
                        **scoring_args
                    )
                performance_dict[aux_metric.name] = score

        if self.eval_metric.name in performance_dict:
            score_eval = performance_dict[self.eval_metric.name]
            score_eval_flipped = self.eval_metric.convert_score_to_sklearn_val(score_eval)  # flip negative once again back to positive (so higher is no longer necessarily better)
            if score_eval_flipped != score_eval:
                flipped = True
            else:
                flipped = False
            if not silent:
                logger.log(20, f"Evaluation: {self.eval_metric.name} on test data: {score_eval}")
                if flipped:
                    logger.log(20, f"\tNote: Scores are always higher_is_better. This metric score can be multiplied by -1 to get the metric value.")

        if not silent:
            logger.log(20, "Evaluations on test data:")
            logger.log(20, json.dumps(performance_dict, indent=4))

        if detailed_report and (self.problem_type != REGRESSION):
            # Construct confusion matrix
            try:
                performance_dict['confusion_matrix'] = confusion_matrix(y_true, y_pred, labels=self.label_cleaner.ordered_class_labels, output_format='pandas_dataframe')
            except ValueError:
                pass
            # One final set of metrics to report
            cl_metric = lambda y_true, y_pred: classification_report(y_true, y_pred, output_dict=True)
            metric_name = 'classification_report'
            if metric_name not in performance_dict:
                try:  # only compute auxiliary metrics which do not error (y_pred = class-probabilities may cause some metrics to error)
                    performance_dict[metric_name] = cl_metric(y_true, y_pred)
                except ValueError:
                    pass
                if not silent and metric_name in performance_dict:
                    logger.log(20, "Detailed (per-class) classification report:")
                    logger.log(20, json.dumps(performance_dict[metric_name], indent=4))
        return performance_dict
コード例 #10
0
    def score_debug(self, X: DataFrame, y=None, extra_info=False, compute_oracle=False, extra_metrics=None, silent=False):
        leaderboard_df = self.leaderboard(extra_info=extra_info, silent=silent)
        if y is None:
            X, y = self.extract_label(X)
        if extra_metrics is None:
            extra_metrics = []
        self._validate_class_labels(y)
        w = None
        if self.weight_evaluation:
            X, w = extract_column(X, self.sample_weight)

        X = self.transform_features(X)
        y_internal = self.label_cleaner.transform(y)
        y_internal = y_internal.fillna(-1)

        trainer = self.load_trainer()
        scores = {}
        all_trained_models = trainer.get_model_names()
        all_trained_models_can_infer = trainer.get_model_names(can_infer=True)
        all_trained_models_original = all_trained_models.copy()
        model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(X=X, models=all_trained_models_can_infer, fit=False, record_pred_time=True)

        if compute_oracle:
            pred_probas = list(model_pred_proba_dict.values())
            ensemble_selection = EnsembleSelection(ensemble_size=100, problem_type=trainer.problem_type, metric=self.eval_metric, quantile_levels=self.quantile_levels)
            ensemble_selection.fit(predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w)  # TODO: Only fit non-nan

            oracle_weights = ensemble_selection.weights_
            oracle_pred_time_start = time.time()
            oracle_pred_proba_norm = [pred * weight for pred, weight in zip(pred_probas, oracle_weights)]
            oracle_pred_proba_ensemble = np.sum(oracle_pred_proba_norm, axis=0)
            oracle_pred_time = time.time() - oracle_pred_time_start
            model_pred_proba_dict['OracleEnsemble'] = oracle_pred_proba_ensemble
            pred_time_test_marginal['OracleEnsemble'] = oracle_pred_time
            all_trained_models.append('OracleEnsemble')

        scoring_args = dict(
            y=y,
            y_internal=y_internal,
            sample_weight=w
        )

        extra_scores = {}
        for model_name, y_pred_proba_internal in model_pred_proba_dict.items():
            scores[model_name] = self._score_with_pred_proba(
                y_pred_proba_internal=y_pred_proba_internal,
                metric=self.eval_metric,
                **scoring_args
            )
            for metric in extra_metrics:
                metric = get_metric(metric, self.problem_type, 'leaderboard_metric')
                if metric.name not in extra_scores:
                    extra_scores[metric.name] = {}
                extra_scores[metric.name][model_name] = self._score_with_pred_proba(
                    y_pred_proba_internal=y_pred_proba_internal,
                    metric=metric,
                    **scoring_args
                )

        if extra_scores:
            series = []
            for metric in extra_scores:
                series.append(pd.Series(extra_scores[metric], name=metric))
            df_extra_scores = pd.concat(series, axis=1)
            extra_metrics_names = list(df_extra_scores.columns)
            df_extra_scores['model'] = df_extra_scores.index
            df_extra_scores = df_extra_scores.reset_index(drop=True)
        else:
            df_extra_scores = None
            extra_metrics_names = None

        pred_time_test = {}
        # TODO: Add support for calculating pred_time_test_full for oracle_ensemble, need to copy graph from trainer and add oracle_ensemble to it with proper edges.
        for model in model_pred_proba_dict.keys():
            if model in all_trained_models_original:
                base_model_set = trainer.get_minimum_model_set(model)
                if len(base_model_set) == 1:
                    pred_time_test[model] = pred_time_test_marginal[base_model_set[0]]
                else:
                    pred_time_test_full_num = 0
                    for base_model in base_model_set:
                        pred_time_test_full_num += pred_time_test_marginal[base_model]
                    pred_time_test[model] = pred_time_test_full_num
            else:
                pred_time_test[model] = None

        scored_models = list(scores.keys())
        for model in all_trained_models:
            if model not in scored_models:
                scores[model] = None
                pred_time_test[model] = None
                pred_time_test_marginal[model] = None

        logger.debug('Model scores:')
        logger.debug(str(scores))
        model_names_final = list(scores.keys())
        df = pd.DataFrame(
            data={
                'model': model_names_final,
                'score_test': list(scores.values()),
                'pred_time_test': [pred_time_test[model] for model in model_names_final],
                'pred_time_test_marginal': [pred_time_test_marginal[model] for model in model_names_final],
            }
        )
        if df_extra_scores is not None:
            df = pd.merge(df, df_extra_scores, on='model', how='left')

        df_merged = pd.merge(df, leaderboard_df, on='model', how='left')
        df_merged = df_merged.sort_values(by=['score_test', 'pred_time_test', 'score_val', 'pred_time_val', 'model'], ascending=[False, True, False, True, False]).reset_index(drop=True)
        df_columns_lst = df_merged.columns.tolist()
        explicit_order = [
            'model',
            'score_test',
        ]
        if extra_metrics_names is not None:
            explicit_order += extra_metrics_names
        explicit_order += [
            'score_val',
            'pred_time_test',
            'pred_time_val',
            'fit_time',
            'pred_time_test_marginal',
            'pred_time_val_marginal',
            'fit_time_marginal',
            'stack_level',
            'can_infer',
            'fit_order',
        ]
        df_columns_other = [column for column in df_columns_lst if column not in explicit_order]
        df_columns_new = explicit_order + df_columns_other
        df_merged = df_merged[df_columns_new]

        return df_merged
コード例 #11
0
    def __init__(self,
                 path: str,
                 name: str,
                 problem_type: str,
                 eval_metric: Union[str, metrics.Scorer] = None,
                 num_classes=None,
                 stopping_metric=None,
                 model=None,
                 hyperparameters=None,
                 features=None,
                 feature_metadata: FeatureMetadata = None,
                 debug=0,
                 **kwargs):
        """ Creates a new model.
            Args:
                path (str): directory where to store all outputs.
                name (str): name of subdirectory inside path where model will be saved.
                problem_type (str): type of problem this model will handle. Valid options: ['binary', 'multiclass', 'regression'].
                eval_metric (str or autogluon.core.metrics.Scorer): objective function the model intends to optimize. If None, will be inferred based on problem_type.
                hyperparameters (dict): various hyperparameters that will be used by model (can be search spaces instead of fixed values).
                feature_metadata (autogluon.tabular.features.feature_metadata.FeatureMetadata): contains feature type information that can be used to identify special features such as text ngrams and datetime as well as which features are numerical vs categorical
        """
        self.name = name  # TODO: v0.1 Consider setting to self._name and having self.name be a property so self.name can't be set outside of self.rename()
        self.path_root = path
        self.path_suffix = self.name + os.path.sep  # TODO: Make into function to avoid having to reassign on load?
        self.path = self.create_contexts(
            self.path_root + self.path_suffix
        )  # TODO: Make this path a function for consistency.
        self.num_classes = num_classes
        self.model = model
        self.problem_type = problem_type
        if eval_metric is not None:
            self.eval_metric = metrics.get_metric(
                eval_metric, self.problem_type, 'eval_metric'
            )  # Note: we require higher values = better performance
        else:
            self.eval_metric = infer_eval_metric(
                problem_type=self.problem_type)
            logger.log(
                20,
                f"Model {self.name}'s eval_metric inferred to be '{self.eval_metric.name}' because problem_type='{self.problem_type}' and eval_metric was not specified during init."
            )

        if self.eval_metric.name in OBJECTIVES_TO_NORMALIZE:
            self.normalize_pred_probas = True
            logger.debug(
                f"{self.name} predicted probabilities will be transformed to never =0 since eval_metric='{self.eval_metric.name}'"
            )
        else:
            self.normalize_pred_probas = False

        if feature_metadata is not None:
            feature_metadata = copy.deepcopy(feature_metadata)
        self.feature_metadata = feature_metadata  # TODO: Should this be passed to a model on creation? Should it live in a Dataset object and passed during fit? Currently it is being updated prior to fit by trainer
        self.features = features
        self.debug = debug

        self.fit_time = None  # Time taken to fit in seconds (Training data)
        self.predict_time = None  # Time taken to predict in seconds (Validation data)
        self.val_score = None  # Score with eval_metric (Validation data)

        self.params = {}
        self.params_aux = {}

        self._set_default_auxiliary_params()
        if hyperparameters is not None:
            hyperparameters = hyperparameters.copy()
            if AG_ARGS_FIT in hyperparameters:
                ag_args_fit = hyperparameters.pop(AG_ARGS_FIT)
                self.params_aux.update(ag_args_fit)

        if stopping_metric is None:
            self.stopping_metric = self.params_aux.get(
                'stopping_metric', self._get_default_stopping_metric())
        else:
            if 'stopping_metric' in self.params_aux:
                raise AssertionError(
                    'stopping_metric was specified in both hyperparameters AG_args_fit and model init. Please specify only once.'
                )
            self.stopping_metric = stopping_metric
        self.stopping_metric = metrics.get_metric(self.stopping_metric,
                                                  self.problem_type,
                                                  'stopping_metric')

        self._set_default_params()
        self.nondefault_params = []
        if hyperparameters is not None:
            self.params.update(hyperparameters)
            self.nondefault_params = list(hyperparameters.keys(
            ))[:]  # These are hyperparameters that user has specified.
        self.params_trained = dict()
コード例 #12
0
ファイル: basic_v1.py プロジェクト: ssheff/autogluon
def train_function(args,
                   reporter,
                   train_df_path,
                   tuning_df_path,
                   time_limits,
                   time_start,
                   base_config,
                   problem_types,
                   column_properties,
                   label_columns,
                   label_shapes,
                   log_metrics,
                   stopping_metric,
                   console_log,
                   ignore_warning=False):
    if time_limits is not None:
        start_train_tick = time.time()
        time_left = time_limits - (start_train_tick - time_start)
        if time_left <= 0:
            reporter.terminate()
            return
    import os
    # Get the log metric scorers
    if isinstance(log_metrics, str):
        log_metrics = [log_metrics]
    # Load the training and tuning data from the parquet file
    train_data = pd.read_parquet(train_df_path)
    tuning_data = pd.read_parquet(tuning_df_path)
    log_metric_scorers = [get_metric(ele) for ele in log_metrics]
    stopping_metric_scorer = get_metric(stopping_metric)
    greater_is_better = stopping_metric_scorer.greater_is_better
    os.environ['MKL_NUM_THREADS'] = '1'
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['MKL_DYNAMIC'] = 'FALSE'
    if ignore_warning:
        import warnings
        warnings.filterwarnings("ignore")
    search_space = args['search_space']
    cfg = base_config.clone()
    specified_values = []
    for key in search_space:
        specified_values.append(key)
        specified_values.append(search_space[key])
    cfg.merge_from_list(specified_values)
    exp_dir = cfg.misc.exp_dir
    if reporter is not None:
        # When the reporter is not None,
        # we create the saved directory based on the task_id + time
        task_id = args.task_id
        exp_dir = os.path.join(exp_dir, 'task{}'.format(task_id))
        os.makedirs(exp_dir, exist_ok=True)
        cfg.defrost()
        cfg.misc.exp_dir = exp_dir
        cfg.freeze()
    logger = logging.getLogger()
    logging_config(folder=exp_dir,
                   name='training',
                   logger=logger,
                   console=console_log)
    logger.info(cfg)
    # Load backbone model
    backbone_model_cls, backbone_cfg, tokenizer, backbone_params_path, _ \
        = get_backbone(cfg.model.backbone.name)
    with open(os.path.join(exp_dir, 'cfg.yml'), 'w') as f:
        f.write(str(cfg))
    text_backbone = backbone_model_cls.from_cfg(backbone_cfg)
    # Build Preprocessor + Preprocess the training dataset + Inference problem type
    # TODO Move preprocessor + Dataloader to outer loop to better cache the dataloader
    preprocessor = TabularBasicBERTPreprocessor(
        tokenizer=tokenizer,
        column_properties=column_properties,
        label_columns=label_columns,
        max_length=cfg.model.preprocess.max_length,
        merge_text=cfg.model.preprocess.merge_text)
    logger.info('Process training set...')
    processed_train = preprocessor.process_train(train_data)
    logger.info('Done!')
    logger.info('Process dev set...')
    processed_dev = preprocessor.process_test(tuning_data)
    logger.info('Done!')
    label = label_columns[0]
    # Get the ground-truth dev labels
    gt_dev_labels = np.array(tuning_data[label].apply(
        column_properties[label].transform))
    ctx_l = get_mxnet_available_ctx()
    base_batch_size = cfg.optimization.per_device_batch_size
    num_accumulated = int(
        np.ceil(cfg.optimization.batch_size / base_batch_size))
    inference_base_batch_size = base_batch_size * cfg.optimization.val_batch_size_mult
    train_dataloader = DataLoader(
        processed_train,
        batch_size=base_batch_size,
        shuffle=True,
        batchify_fn=preprocessor.batchify(is_test=False))
    dev_dataloader = DataLoader(
        processed_dev,
        batch_size=inference_base_batch_size,
        shuffle=False,
        batchify_fn=preprocessor.batchify(is_test=True))
    net = BERTForTabularBasicV1(
        text_backbone=text_backbone,
        feature_field_info=preprocessor.feature_field_info(),
        label_shape=label_shapes[0],
        cfg=cfg.model.network)
    net.initialize_with_pretrained_backbone(backbone_params_path, ctx=ctx_l)
    net.hybridize()
    num_total_params, num_total_fixed_params = count_parameters(
        net.collect_params())
    logger.info('#Total Params/Fixed Params={}/{}'.format(
        num_total_params, num_total_fixed_params))
    # Initialize the optimizer
    updates_per_epoch = int(
        len(train_dataloader) / (num_accumulated * len(ctx_l)))
    optimizer, optimizer_params, max_update \
        = get_optimizer(cfg.optimization,
                        updates_per_epoch=updates_per_epoch)
    valid_interval = math.ceil(cfg.optimization.valid_frequency *
                               updates_per_epoch)
    train_log_interval = math.ceil(cfg.optimization.log_frequency *
                                   updates_per_epoch)
    trainer = mx.gluon.Trainer(net.collect_params(),
                               optimizer,
                               optimizer_params,
                               update_on_kvstore=False)
    if 0 < cfg.optimization.layerwise_lr_decay < 1:
        apply_layerwise_decay(net.text_backbone,
                              cfg.optimization.layerwise_lr_decay,
                              backbone_name=cfg.model.backbone.name)
    # Do not apply weight decay to all the LayerNorm and bias
    for _, v in net.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    params = [p for p in net.collect_params().values() if p.grad_req != 'null']

    # Set grad_req if gradient accumulation is required
    if num_accumulated > 1:
        logger.info('Using gradient accumulation.'
                    ' Global batch size = {}'.format(
                        cfg.optimization.batch_size))
        for p in params:
            p.grad_req = 'add'
        net.collect_params().zero_grad()
    train_loop_dataloader = grouper(repeat(train_dataloader), len(ctx_l))
    log_loss_l = [mx.np.array(0.0, dtype=np.float32, ctx=ctx) for ctx in ctx_l]
    log_num_samples_l = [0 for _ in ctx_l]
    logging_start_tick = time.time()
    best_performance_score = None
    mx.npx.waitall()
    no_better_rounds = 0
    report_idx = 0
    start_tick = time.time()
    if time_limits is not None:
        time_limits -= start_tick - time_start
        if time_limits <= 0:
            reporter.terminate()
            return
    best_report_items = None
    for update_idx in tqdm.tqdm(range(max_update), disable=None):
        num_samples_per_update_l = [0 for _ in ctx_l]
        for accum_idx in range(num_accumulated):
            sample_l = next(train_loop_dataloader)
            loss_l = []
            num_samples_l = [0 for _ in ctx_l]
            for i, (sample, ctx) in enumerate(zip(sample_l, ctx_l)):
                feature_batch, label_batch = sample
                feature_batch = move_to_ctx(feature_batch, ctx)
                label_batch = move_to_ctx(label_batch, ctx)
                with mx.autograd.record():
                    pred = net(feature_batch)
                    if problem_types[0] == _C.CLASSIFICATION:
                        logits = mx.npx.log_softmax(pred, axis=-1)
                        loss = -mx.npx.pick(logits, label_batch[0])
                    elif problem_types[0] == _C.REGRESSION:
                        loss = mx.np.square(pred - label_batch[0])
                    loss_l.append(loss.mean() / len(ctx_l))
                    num_samples_l[i] = loss.shape[0]
                    num_samples_per_update_l[i] += loss.shape[0]
            for loss in loss_l:
                loss.backward()
            for i in range(len(ctx_l)):
                log_loss_l[i] += loss_l[i] * len(ctx_l) * num_samples_l[i]
                log_num_samples_l[i] += num_samples_per_update_l[i]
        # Begin to update
        trainer.allreduce_grads()
        num_samples_per_update = sum(num_samples_per_update_l)
        total_norm, ratio, is_finite = \
            clip_grad_global_norm(params, cfg.optimization.max_grad_norm * num_accumulated)
        total_norm = total_norm / num_accumulated
        trainer.update(num_samples_per_update)

        # Clear after update
        if num_accumulated > 1:
            net.collect_params().zero_grad()
        if (update_idx + 1) % train_log_interval == 0:
            log_loss = sum([ele.as_in_ctx(ctx_l[0])
                            for ele in log_loss_l]).asnumpy()
            log_num_samples = sum(log_num_samples_l)
            logger.info(
                '[Iter {}/{}, Epoch {}] train loss={:0.4e}, gnorm={:0.4e}, lr={:0.4e}, #samples processed={},'
                ' #sample per second={:.2f}'.format(
                    update_idx + 1, max_update,
                    int(update_idx / updates_per_epoch),
                    log_loss / log_num_samples, total_norm,
                    trainer.learning_rate, log_num_samples,
                    log_num_samples / (time.time() - logging_start_tick)))
            logging_start_tick = time.time()
            log_loss_l = [
                mx.np.array(0.0, dtype=np.float32, ctx=ctx) for ctx in ctx_l
            ]
            log_num_samples_l = [0 for _ in ctx_l]
        if (update_idx + 1) % valid_interval == 0 or (update_idx +
                                                      1) == max_update:
            valid_start_tick = time.time()
            dev_predictions = \
                _classification_regression_predict(net, dataloader=dev_dataloader,
                                                   problem_type=problem_types[0],
                                                   has_label=False)
            log_scores = [
                calculate_metric(scorer, gt_dev_labels, dev_predictions,
                                 problem_types[0])
                for scorer in log_metric_scorers
            ]
            dev_score = calculate_metric(stopping_metric_scorer, gt_dev_labels,
                                         dev_predictions, problem_types[0])
            valid_time_spent = time.time() - valid_start_tick

            if best_performance_score is None or \
                    (greater_is_better and dev_score >= best_performance_score) or \
                    (not greater_is_better and dev_score <= best_performance_score):
                find_better = True
                no_better_rounds = 0
                best_performance_score = dev_score
                net.save_parameters(os.path.join(exp_dir, 'best_model.params'))
            else:
                find_better = False
                no_better_rounds += 1
            mx.npx.waitall()
            loss_string = ', '.join([
                '{}={:0.4e}'.format(metric.name, score)
                for score, metric in zip(log_scores, log_metric_scorers)
            ])
            logger.info('[Iter {}/{}, Epoch {}] valid {}, time spent={:.3f}s,'
                        ' total_time={:.2f}min'.format(
                            update_idx + 1, max_update,
                            int(update_idx / updates_per_epoch), loss_string,
                            valid_time_spent, (time.time() - start_tick) / 60))
            report_items = [('iteration', update_idx + 1),
                            ('report_idx', report_idx + 1),
                            ('epoch', int(update_idx / updates_per_epoch))] +\
                           [(metric.name, score)
                            for score, metric in zip(log_scores, log_metric_scorers)] + \
                           [('find_better', find_better),
                            ('time_spent', int(time.time() - start_tick))]
            total_time_spent = time.time() - start_tick

            if stopping_metric_scorer._sign < 0:
                report_items.append(('reward_attr', -dev_score))
            else:
                report_items.append(('reward_attr', dev_score))
            report_items.append(('eval_metric', stopping_metric_scorer.name))
            report_items.append(('exp_dir', exp_dir))
            if find_better:
                best_report_items = report_items
            reporter(**dict(report_items))
            report_idx += 1
            if no_better_rounds >= cfg.learning.early_stopping_patience:
                logger.info('Early stopping patience reached!')
                break
            if time_limits is not None and total_time_spent > time_limits:
                break

    best_report_items_dict = dict(best_report_items)
    best_report_items_dict['report_idx'] = report_idx + 1
    reporter(**best_report_items_dict)
コード例 #13
0
ファイル: basic_v1.py プロジェクト: itscarrot/autogluon
 def train(self,
           train_data,
           tuning_data,
           resource,
           time_limits=None,
           search_strategy='random',
           search_options=None,
           scheduler_options=None,
           num_trials=None,
           plot_results=False,
           console_log=True,
           ignore_warning=True):
     force_forkserver()
     start_tick = time.time()
     logging_config(folder=self._output_directory,
                    name='main',
                    console=console_log,
                    logger=self._logger)
     assert len(self._label_columns) == 1
     # TODO(sxjscience) Try to support S3
     os.makedirs(self._output_directory, exist_ok=True)
     search_space_reg = args(search_space=space.Dict(**self.search_space))
     # Scheduler and searcher for HPO
     if search_strategy.endswith('hyperband') and time_limits is None:
         time_limits = 5 * 60 * 60  # 5 hours
     if scheduler_options is None:
         scheduler_options = dict()
     stopping_metric_scorer = get_metric(self._stopping_metric)
     scheduler_options = compile_scheduler_options(
         scheduler_options=scheduler_options,
         search_strategy=search_strategy,
         search_options=search_options,
         nthreads_per_trial=resource['num_cpus'],
         ngpus_per_trial=resource['num_gpus'],
         checkpoint=None,
         num_trials=num_trials,
         time_out=scheduler_options.get('time_out'),
         resume=False,
         visualizer=scheduler_options.get('visualizer'),
         time_attr='report_idx',
         reward_attr=stopping_metric_scorer.reward_attr,
         dist_ip_addrs=scheduler_options.get('dist_ip_addrs'))
     train_fn = search_space_reg(
         functools.partial(train_function,
                           train_data=train_data,
                           time_limits=time_limits,
                           tuning_data=tuning_data,
                           base_config=self.base_config,
                           problem_types=self.problem_types,
                           column_properties=self._column_properties,
                           label_columns=self._label_columns,
                           label_shapes=self._label_shapes,
                           log_metrics=self._log_metrics,
                           stopping_metric=self._stopping_metric,
                           console_log=console_log,
                           ignore_warning=ignore_warning))
     scheduler_cls = schedulers[search_strategy.lower()]
     # Create scheduler, run HPO experiment
     scheduler = scheduler_cls(train_fn, **scheduler_options)
     scheduler.run()
     scheduler.join_jobs()
     if len(scheduler.config_history) == 0:
         raise RuntimeError(
             'No training job has been completed! '
             'There are two possibilities: '
             '1) The time_limits is too small, '
             'or 2) There are some internal errors in AutoGluon. '
             'For the first case, you can increase the time_limits or set it to '
             'None, e.g., setting "TextPrediction.fit(..., time_limits=None). To '
             'further investigate the root cause, you can also try to train with '
             '"verbosity=3", i.e., TextPrediction.fit(..., verbosity=3).')
     best_config = scheduler.get_best_config()
     self._logger.info('Results=', scheduler.searcher._results)
     self._logger.info('Best_config={}'.format(best_config))
     best_task_id = scheduler.get_best_task_id()
     best_model_saved_dir_path = os.path.join(self._output_directory,
                                              'task{}'.format(best_task_id))
     best_cfg_path = os.path.join(best_model_saved_dir_path, 'cfg.yml')
     cfg = self.base_config.clone_merge(best_cfg_path)
     self._results = dict()
     self._results.update(best_reward=scheduler.get_best_reward(),
                          best_config=scheduler.get_best_config(),
                          total_time=time.time() - start_tick,
                          metadata=scheduler.metadata,
                          training_history=scheduler.training_history,
                          config_history=scheduler.config_history,
                          reward_attr=scheduler._reward_attr,
                          config=cfg)
     if plot_results:
         plot_training_curves = os.path.join(self._output_directory,
                                             'plot_training_curves.png')
         scheduler.get_training_curves(filename=plot_training_curves,
                                       plot=plot_results,
                                       use_legend=True)
     # Consider to move this to a separate predictor
     self._config = cfg
     backbone_model_cls, backbone_cfg, tokenizer, backbone_params_path, _ \
         = get_backbone(cfg.model.backbone.name)
     text_backbone = backbone_model_cls.from_cfg(backbone_cfg)
     preprocessor = TabularBasicBERTPreprocessor(
         tokenizer=tokenizer,
         column_properties=self._column_properties,
         label_columns=self._label_columns,
         max_length=cfg.model.preprocess.max_length,
         merge_text=cfg.model.preprocess.merge_text)
     self._preprocessor = preprocessor
     net = BERTForTabularBasicV1(
         text_backbone=text_backbone,
         feature_field_info=preprocessor.feature_field_info(),
         label_shape=self._label_shapes[0],
         cfg=cfg.model.network)
     net.hybridize()
     ctx_l = get_mxnet_available_ctx()
     net.load_parameters(os.path.join(best_model_saved_dir_path,
                                      'best_model.params'),
                         ctx=ctx_l[0])
     self._net = net
     mx.npx.waitall()