Ejemplo n.º 1
0
 def fit(self, X, y=None, verbose=1):
     """
     sklearn-like fit function, receives a dataset and build the subspace clustering that models the data
     """
     print ""
     if X is None:
         return None
     if X.size < self.N:
         raise RuntimeError(
             'The dataset provided is smaller than the sample size, use instead the fit_online function'
         )
     X_ = self._check_X_matrix_validity(X)
     self._set_data_sample(X_, y)
     for iteration in xrange(self.NbIter):
         random_element = np.random.randint(
             0, len(self.data_objects_index_not_in_sample))
         random_index = self.data_objects_index_not_in_sample.pop(
             random_element)
         data_object_index_removed_from_sample = self.data_objects_index_in_sample.pop(
             0)
         self.data_objects_index_in_sample.append(random_index)
         self.data_objects_index_not_in_sample.append(
             data_object_index_removed_from_sample)
         if y:
             self._send_array(X_[random_index, :], y[random_index])
         else:
             self._send_array(X_[random_index, :])
         SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
             self._p_subcmedians_c, self._data_object)
         self.generation += 1
         if verbose:
             sys.stdout.write("\r" + str(iteration) + "/" +
                              str(self.NbIter))
             sys.stdout.flush()
     print ""
Ejemplo n.º 2
0
 def fit_online_mode(self, X, y=None):
     """
     Sklearn-like fit function, receives a dataset and build the subspace clustering that models the data.
     This function has been created to deal with streams of data, in this case the dataset provided as an input will never appear again, so it does not make sense to keep record of the sample used or not
     """
     if X is None:
         return None
     X_ = self._check_X_matrix_validity(X)
     if len(X_.shape) == 1:
         self._send_array(X_, y)
         SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
             self._p_subcmedians_c, self._data_object)
         self.generation += 1
     else:
         for i, x in enumerate(X_):
             if y:
                 self._send_array(x, y[i])
             else:
                 self._send_array(x)
             SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
                 self._p_subcmedians_c, self._data_object)
             self.generation += 1