def predict(self, data_set: DataSet) -> Mapping[str, int]: super().predict(data_set) if self._scaler is None: raise RuntimeError( "no model has been built yet. Invoke fit before predict") # no upsampling during prediction - we may not even have labels at this point # standardize data using coefficients computed during training data_set = data_set.scaled(self._scaler) # get predictions chunked_predictions = self._learner.predict(data_set) if self._majority_vote: return _majority_vote(data_set, chunked_predictions) else: return dict(list(zip(data_set.filenames, chunked_predictions)))
def fit(self, data_set: DataSet): # generic parameter checks super().fit(data_set) if self._upsample: data_set = upsample(data_set) # shuffle data set after upsampling data_set = data_set.shuffled() # standardize features and remember coefficients for prediction self._scaler = StandardScaler() self._scaler.fit(data_set.features) data_set = data_set.scaled(self._scaler) # train model self._learner.fit(data_set)