def transform(self, X): """ transform Transform one hot features in the X matrix into int coded categorical features. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The sample or set of samples that should be transformed. Returns ------- numpy.ndarray The transformed data. """ r, c = get_dimensions(X) new_width = c for i in range(len(self.categorical_list)): new_width -= len(self.categorical_list[i]) - 1 ret = np.zeros((0, new_width), dtype=X.dtype) for i in range(r): ret = np.concatenate((ret, self._transform(X[i, :], new_width)), axis=0) return ret
def run(X, y, hyperParams): """ run Test function for SAMKNN, not integrated with evaluation modules. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix, coded as 64 bits. y: numpy.array of size n_samples The labels for all the samples in X coded as 8 bits. hyperParams: dict A dictionary containing the __init__ params for the SAMKNN. """ r, c = get_dimensions(X) classifier = SAMKNN(n_neighbors=hyperParams['nNeighbours'], maxSize=hyperParams['maxSize'], knnWeights=hyperParams['knnWeights'], STMSizeAdaption=hyperParams['STMSizeAdaption'], useLTM=hyperParams['useLTM']) logging.info('applying model on dataset') predictedLabels = [] for i in range(r): pred = classifier.predict(np.asarray([X[i]])) predictedLabels.append(pred[0]) classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None) if (i % (r / 20)) == 0: print(str((i / (r / 20)) * 5) + "%") accuracy = accuracy_score(y, predictedLabels) logging.info('error rate %.2f%%' % (100 - 100 * accuracy))
def partial_fit(self, X, y=None, classes=None): # TODO r, c = get_dimensions(X) if self.window is None: self.window = InstanceWindow(max_size=self.window_size, dtype=float) # models = [] modeles = 0 if not self.H: # Slice pretraining set debut = 0 fin = self.window_size while (modeles < self.max_models): X_batch = X[debut:fin, :] y_batch = y[debut:fin] debut += self.window_size fin += self.window_size self.h = DecisionTreeClassifier() self.h.fit(X_batch, y_batch) self.H.append(self.h) # <-- and append it to the ensemble modeles += 1 else: for i in range(r): self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]])) for model in range(modeles): self.h = DecisionTreeClassifier() self.h.fit(self.window.get_attributes_matrix(), self.window.get_targets_matrix()) self.H.append(self.h) # <-- and append it to the ensemble return self
def predict(self, X): r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = self.get_votes_for_instance(X[i]) if votes == {}: # Tree is empty, all classes equal, default to zero predictions.append(0) else: predictions.append(max(votes, key=votes.get)) return predictions
def predict(self, X): r, c = get_dimensions(X) predictedLabel = [] if self._STMSamples is None: self._STMSamples = np.empty(shape=(0, c)) self._LTMSamples = np.empty(shape=(0, c)) for i in range(r): distancesSTM = SAMKNN.get_distances(X[i], self._STMSamples) predictedLabel.append(self.predictFct(X[i], None, distancesSTM)) return predictedLabel
def partial_fit(self, X, y, classes=None, weight=None): """Processes a new sample.""" r, c = get_dimensions(X) if self._STMSamples is None: self._STMSamples = np.empty(shape=(0, c)) self._LTMSamples = np.empty(shape=(0, c)) for i in range(r): self._partial_fit(X[i, :], y[i]) return self
def test_get_dimensions(): rows_expected = 5 cols_expected = 5 a_list = [None] * cols_expected rows, cols = get_dimensions(a_list) assert rows == 1 assert cols == cols_expected a_list_of_lists = [a_list] * rows_expected rows, cols = get_dimensions(a_list_of_lists) assert rows == rows_expected assert cols == cols_expected a_ndarray = np.ndarray(cols_expected) rows, cols = get_dimensions(a_ndarray) assert rows == 1 assert cols == cols_expected a_ndarray = np.ndarray((rows_expected, cols_expected)) rows, cols = get_dimensions(a_ndarray) assert rows == rows_expected assert cols == cols_expected
def transform(self, X): """ transform Does the transformation process in the samples in X. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The sample or set of samples that should be transformed. """ r, c = get_dimensions(X) for i in range(r): for j in range(c): if X[i][j] in self.missing_value: X[i][j] = self._get_substitute(j) return X