예제 #1
0
    def predict(self, data_set: DataSet) -> Mapping[str, int]:
        super().predict(data_set)

        if self._scaler is None:
            raise RuntimeError(
                "no model has been built yet. Invoke fit before predict")

        # no upsampling during prediction - we may not even have labels at this point
        # standardize data using coefficients computed during training
        data_set = data_set.scaled(self._scaler)

        # get predictions
        chunked_predictions = self._learner.predict(data_set)

        if self._majority_vote:
            return _majority_vote(data_set, chunked_predictions)
        else:
            return dict(list(zip(data_set.filenames, chunked_predictions)))
예제 #2
0
    def fit(self, data_set: DataSet):
        # generic parameter checks
        super().fit(data_set)

        if self._upsample:
            data_set = upsample(data_set)

        # shuffle data set after upsampling
        data_set = data_set.shuffled()

        # standardize features and remember coefficients for prediction
        self._scaler = StandardScaler()
        self._scaler.fit(data_set.features)

        data_set = data_set.scaled(self._scaler)

        # train model
        self._learner.fit(data_set)