예제 #1
0
    def transform(self, X):
        """ transform
        
        Transform one hot features in the X matrix into int coded 
        categorical features.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
         
        Returns
        -------
        numpy.ndarray
            The transformed data.
        
        """
        r, c = get_dimensions(X)

        new_width = c
        for i in range(len(self.categorical_list)):
            new_width -= len(self.categorical_list[i]) - 1

        ret = np.zeros((0, new_width), dtype=X.dtype)
        for i in range(r):
            ret = np.concatenate((ret, self._transform(X[i, :], new_width)),
                                 axis=0)

        return ret
예제 #2
0
def run(X, y, hyperParams):
    """ run
    
    Test function for SAMKNN, not integrated with evaluation modules.
    
    Parameters
    ----------
    X: numpy.ndarray of shape (n_samples, n_features)
        The feature's matrix, coded as 64 bits.
    
    y: numpy.array of size n_samples
        The labels for all the samples in X coded as 8 bits.
    
    hyperParams: dict
        A dictionary containing the __init__ params for the SAMKNN.
    
    """
    r, c = get_dimensions(X)
    classifier = SAMKNN(n_neighbors=hyperParams['nNeighbours'],
                        maxSize=hyperParams['maxSize'],
                        knnWeights=hyperParams['knnWeights'],
                        STMSizeAdaption=hyperParams['STMSizeAdaption'],
                        useLTM=hyperParams['useLTM'])
    logging.info('applying model on dataset')
    predictedLabels = []
    for i in range(r):
        pred = classifier.predict(np.asarray([X[i]]))
        predictedLabels.append(pred[0])
        classifier = classifier.partial_fit(np.asarray([X[i]]),
                                            np.asarray([y[i]]), None)
        if (i % (r / 20)) == 0:
            print(str((i / (r / 20)) * 5) + "%")
    accuracy = accuracy_score(y, predictedLabels)
    logging.info('error rate %.2f%%' % (100 - 100 * accuracy))
 def partial_fit(self, X, y=None, classes=None):
     # TODO
     r, c = get_dimensions(X)
     if self.window is None:
         self.window = InstanceWindow(max_size=self.window_size,
                                      dtype=float)
     # models = []
     modeles = 0
     if not self.H:
         # Slice pretraining set
         debut = 0
         fin = self.window_size
         while (modeles < self.max_models):
             X_batch = X[debut:fin, :]
             y_batch = y[debut:fin]
             debut += self.window_size
             fin += self.window_size
             self.h = DecisionTreeClassifier()
             self.h.fit(X_batch, y_batch)
             self.H.append(self.h)  # <-- and append it to the ensemble
             modeles += 1
     else:
         for i in range(r):
             self.window.add_element(np.asarray([X[i]]),
                                     np.asarray([[y[i]]]))
         for model in range(modeles):
             self.h = DecisionTreeClassifier()
             self.h.fit(self.window.get_attributes_matrix(),
                        self.window.get_targets_matrix())
             self.H.append(self.h)  # <-- and append it to the ensemble
     return self
 def predict(self, X):
     r, _ = get_dimensions(X)
     predictions = []
     for i in range(r):
         votes = self.get_votes_for_instance(X[i])
         if votes == {}:
             # Tree is empty, all classes equal, default to zero
             predictions.append(0)
         else:
             predictions.append(max(votes, key=votes.get))
     return predictions
예제 #5
0
    def predict(self, X):
        r, c = get_dimensions(X)
        predictedLabel = []
        if self._STMSamples is None:
            self._STMSamples = np.empty(shape=(0, c))
            self._LTMSamples = np.empty(shape=(0, c))

        for i in range(r):
            distancesSTM = SAMKNN.get_distances(X[i], self._STMSamples)
            predictedLabel.append(self.predictFct(X[i], None, distancesSTM))
        return predictedLabel
예제 #6
0
    def partial_fit(self, X, y, classes=None, weight=None):
        """Processes a new sample."""
        r, c = get_dimensions(X)
        if self._STMSamples is None:
            self._STMSamples = np.empty(shape=(0, c))
            self._LTMSamples = np.empty(shape=(0, c))

        for i in range(r):
            self._partial_fit(X[i, :], y[i])

        return self
예제 #7
0
def test_get_dimensions():
    rows_expected = 5
    cols_expected = 5

    a_list = [None] * cols_expected
    rows, cols = get_dimensions(a_list)
    assert rows == 1
    assert cols == cols_expected

    a_list_of_lists = [a_list] * rows_expected
    rows, cols = get_dimensions(a_list_of_lists)
    assert rows == rows_expected
    assert cols == cols_expected

    a_ndarray = np.ndarray(cols_expected)
    rows, cols = get_dimensions(a_ndarray)
    assert rows == 1
    assert cols == cols_expected

    a_ndarray = np.ndarray((rows_expected, cols_expected))
    rows, cols = get_dimensions(a_ndarray)
    assert rows == rows_expected
    assert cols == cols_expected
예제 #8
0
    def transform(self, X):
        """ transform
        
        Does the transformation process in the samples in X.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
        
        """
        r, c = get_dimensions(X)
        for i in range(r):
            for j in range(c):
                if X[i][j] in self.missing_value:
                    X[i][j] = self._get_substitute(j)

        return X