Exemple #1
0
    def remove_categories(self, categories, ignore_absences=False):
        '''
		Non destructive category removal.

		Parameters
		----------
		categories : list
			list of categories to remove
		ignore_absences : bool, False by default
			if categories does not appear, don't raise an error, just move on.

		Returns
		-------
		TermDocMatrix, new object with categories removed.
		'''

        idx_to_delete_list = []
        existing_categories = set(self.get_categories())
        for category in categories:
            if category not in existing_categories:
                if not ignore_absences:
                    raise KeyError('Category %s not found' % (category))
                continue
            idx_to_delete_list.append(
                self._category_idx_store.getidx(category))

        new_category_idx_store = self._category_idx_store.batch_delete_idx(
            idx_to_delete_list)

        columns_to_delete = np.nonzero(np.isin(self._y, idx_to_delete_list))
        new_X = delete_columns(self._X.T, columns_to_delete).T
        new_mX = delete_columns(self._mX.T, columns_to_delete).T
        intermediate_y = self._y[~np.isin(self._y, idx_to_delete_list)]
        old_y_to_new_y = [
            self._category_idx_store.getidx(x)
            for x in new_category_idx_store._i2val
        ]
        new_y = np.array([
            old_y_to_new_y.index(i) if i in old_y_to_new_y else None
            for i in range(intermediate_y.max() + 1)
        ])[intermediate_y]

        new_metadata_idx_store = self._metadata_idx_store
        if len(self._metadata_idx_store):
            meta_idx_to_delete = np.nonzero(new_mX.sum(axis=0).A1 == 0)[0]
            new_metadata_idx_store = self._metadata_idx_store.batch_delete_idx(
                meta_idx_to_delete)

        term_idx_to_delete = np.nonzero(new_X.sum(axis=0).A1 == 0)[0]
        new_term_idx_store = self._term_idx_store.batch_delete_idx(
            term_idx_to_delete)
        new_X = delete_columns(new_X, term_idx_to_delete)

        term_doc_mat_to_ret = self._make_new_term_doc_matrix(
            new_X, new_mX, new_y, new_term_idx_store, new_category_idx_store,
            new_metadata_idx_store, ~np.isin(self._y, idx_to_delete_list))
        return term_doc_mat_to_ret
Exemple #2
0
	def remove_terms_by_indices(self, idx_to_delete_list):
		new_term_idx_store = self._term_idx_store.batch_delete_idx(idx_to_delete_list)
		new_X = delete_columns(self._X, idx_to_delete_list)
		return self._make_new_term_doc_matrix(new_X,
		                                      self._mX,
		                                      self._y,
		                                      new_term_idx_store,
		                                      self._category_idx_store,
		                                      self._metadata_idx_store,
		                                      self._y == self._y)
 def _get_X_after_delete_terms(self, idx_to_delete_list):
     new_term_idx_store = self._term_idx_store.batch_delete_idx(idx_to_delete_list)
     new_X = delete_columns(self._X, idx_to_delete_list)
     return new_X, new_term_idx_store
 def _get_X_after_delete_terms(self, idx_to_delete_list, non_text=False):
     new_term_idx_store = self._get_relevant_idx_store(
         non_text).batch_delete_idx(idx_to_delete_list)
     new_X = delete_columns(self._get_relevant_X(non_text),
                            idx_to_delete_list)
     return new_X, new_term_idx_store
	def remove_terms_by_indices(self, idx_to_delete_list):
		new_term_idx_store = self._term_idx_store.batch_delete_idx(idx_to_delete_list)
		new_X = delete_columns(self._X, idx_to_delete_list)
		return self._term_doc_matrix_with_new_X(new_X, new_term_idx_store)
 def _get_X_after_delete_terms(self, idx_to_delete_list):
     new_term_idx_store = self._term_idx_store.batch_delete_idx(idx_to_delete_list)
     new_X = delete_columns(self._X, idx_to_delete_list)
     return new_X, new_term_idx_store